Further compression and png work.

This commit is contained in:
James Grogan 2022-11-23 15:41:33 +00:00
parent 318b481ccc
commit 9c8faa534b
34 changed files with 1164 additions and 203 deletions

View file

@ -0,0 +1,23 @@
#pragma once
class BitStream;
class AbstractEncoder
{
public:
AbstractEncoder(BitStream* inputStream, BitStream* outputStream)
: mInputStream(inputStream),
mOutputStream(outputStream)
{
}
virtual ~AbstractEncoder() = default;
virtual bool encode() = 0;
virtual bool decode() = 0;
protected:
BitStream* mInputStream{nullptr};
BitStream* mOutputStream{nullptr};
};

View file

@ -4,6 +4,10 @@ list(APPEND compression_LIB_INCLUDES
HuffmanEncoder.cpp
RunLengthEncoder.cpp
ZlibData.cpp
ZlibEncoder.cpp
DeflateEncoder.cpp
DeflateBlock.cpp
Lz77Encoder.cpp
)
add_library(compression SHARED ${compression_LIB_INCLUDES})

View file

@ -0,0 +1,262 @@
#include "DeflateBlock.h"
#include "ByteUtils.h"
#include <algorithm>
#include <iostream>
DeflateBlock::DeflateBlock(BitStream* inputStream, BitStream* outputStream)
: mInputStream(inputStream),
mOutputStream(outputStream)
{
}
bool DeflateBlock::readNextCodeLengthSymbol(unsigned char& final_symbol)
{
unsigned working_index{0};
auto count = mCodeLengthMapping[working_index].first;
auto delta = count;
bool found{false};
unsigned char buffer{0};
unsigned char working_bits{0};
unsigned working_symbol{0};
while(!found)
{
auto valid = mInputStream->readNextNBits(delta, buffer);
working_bits = (working_bits << delta) | buffer;
for(const auto& entry : mCodeLengthMapping[working_index].second)
{
if (entry.first == working_bits)
{
found = true;
working_symbol = entry.second;
break;
}
}
if (!found)
{
working_index++;
if (working_index >= mCodeLengthMapping.size())
{
break;
}
auto new_count = mCodeLengthMapping[working_index].first;
delta = new_count - count;
count = new_count;
}
}
if (found)
{
final_symbol = working_symbol;
std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl;
std::cout << "At Byte offset " << mInputStream->getCurrentByteOffset() << " and bit offset " << mInputStream->getCurrentBitOffset() << std::endl;
return true;
}
else
{
std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << std::endl;
return false;
}
}
void DeflateBlock::setCodeLengthAlphabetLengths(const std::vector<unsigned char>& lengths)
{
mCodeLengthAlphabetLengths = lengths;
}
void DeflateBlock::setCodeLengthLength(unsigned length)
{
mHclen = length;
}
void DeflateBlock::setLiteralsTableLength(unsigned length)
{
mHlit = length;
}
void DeflateBlock::setDistanceTableLength(unsigned length)
{
mHdist = length;
}
void DeflateBlock::setIsFinalBlock(bool isFinal)
{
}
void DeflateBlock::flushToStream()
{
}
void DeflateBlock::readLiteralCodeLengths()
{
std::vector<unsigned> lengths;
unsigned char symbol{0};
while(lengths.size() < mHlit)
{
bool valid = readNextCodeLengthSymbol(symbol);
if (!valid)
{
std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
if (symbol < 16)
{
lengths.push_back(symbol);
}
else if(symbol == 16)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(2, num_reps);
auto last_val = lengths[lengths.size()-1];
std::cout << "Got val 16 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(last_val);
}
}
else if(symbol == 17)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(3, num_reps);
std::cout << "Got val 17 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(0);
}
}
else if(symbol == 18)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(7, num_reps);
std::cout << "Got val 18 doing " << 11 + num_reps << std::endl;
for(unsigned idx=0; idx< 11 + num_reps; idx++)
{
lengths.push_back(0);
}
}
}
}
void DeflateBlock::buildCodeLengthMapping()
{
for(unsigned idx=1; idx<8; idx++)
{
std::vector<unsigned> entries;
for(unsigned jdx=0; jdx<mCodeLengthAlphabetLengths.size(); jdx++)
{
if (mCodeLengthAlphabetLengths[jdx] == idx)
{
entries.push_back(jdx);
}
}
if (entries.empty())
{
continue;
}
CodeLengthCountEntry count_entry{idx, {}};
std::sort(entries.begin(), entries.end());
unsigned char offset = 0x01 << idx - 1;
unsigned char count{0};
for (auto entry : entries)
{
count_entry.second.push_back(CodeLengthEntry{offset + count, entry});
count++;
}
mCodeLengthMapping.push_back(count_entry);
}
for (const auto& map_data : mCodeLengthMapping)
{
std::cout << "Map entry " << map_data.first << " has vals: " << std::endl;
for (const auto& entry : map_data.second)
{
std::cout << "Key " << ByteUtils::toString(entry.first) << " val: " << entry.second << std::endl;
}
}
}
void DeflateBlock::readDynamicHuffmanTable()
{
unsigned char h_lit{0};
mInputStream->readNextNBits(5, h_lit);
mHlit = h_lit + 257;
std::cout << "Got HLIT " << mHlit << std::endl;
unsigned char h_dist{0};
mInputStream->readNextNBits(5, h_dist);
mHdist = h_dist + 1;
std::cout << "Got HDIST " << mHdist << std::endl;
unsigned char h_clen{0};
mInputStream->readNextNBits(4, h_clen);
mHclen = h_clen + 4;
std::cout << "Got HCLEN " << mHclen << std::endl;
mCodeLengthAlphabetLengths = std::vector<unsigned char>(19, 0);
unsigned char buffer{0};
for(unsigned idx = 0; idx< mHclen; idx++)
{
mInputStream->readNextNBits(3, buffer);
mCodeLengthAlphabetLengths[CODE_LENGTH_ALPHABET_PERMUTATION[idx]] = buffer;
std::cout << "Got code length for " << CODE_LENGTH_ALPHABET_PERMUTATION[idx] << " of " << static_cast<unsigned>(buffer) << std::endl;
}
buildCodeLengthMapping();
readLiteralCodeLengths();
}
void DeflateBlock::readHeader()
{
auto working_byte = mInputStream->getCurrentByte();
std::cout << "Into process data "<< std::endl;
std::cout << mInputStream->logNextNBytes(9);
unsigned char final_block{0};
mInputStream->readNextNBits(1, final_block);
mInFinalBlock = bool(final_block);
if (mInFinalBlock)
{
std::cout << "Got final block" << std::endl;
}
mInputStream->readNextNBits(2, mCompressionType);
std::cout << "Compress type byte is: " << static_cast<unsigned>(mCompressionType) << std::endl;
if (mCompressionType == NO_COMPRESSION)
{
std::cout << "Got NO_COMPRESSION" << std::endl;
}
else if (mCompressionType == FIXED_HUFFMAN)
{
std::cout << "Got FIXED_HUFFMAN" << std::endl;
}
else if (mCompressionType == DYNAMIC_HUFFMAN)
{
std::cout << "Got DYNAMIC_HUFFMAN" << std::endl;
readDynamicHuffmanTable();
}
else if (mCompressionType == ERROR)
{
std::cout << "Got ERROR" << std::endl;
}
}

View file

@ -0,0 +1,60 @@
#pragma once
#include "BitStream.h"
class DeflateBlock
{
public:
DeflateBlock(BitStream* inputStream, BitStream* outputStream);
void readHeader();
void readDynamicHuffmanTable();
void buildCodeLengthMapping();
void readLiteralCodeLengths();
bool readNextCodeLengthSymbol(unsigned char& buffer);
bool isFinalBlock() const
{
return mInFinalBlock;
}
void setCodeLengthAlphabetLengths(const std::vector<unsigned char>& lengths);
void setCodeLengthLength(unsigned length);
void setLiteralsTableLength(unsigned length);
void setDistanceTableLength(unsigned length);
void setIsFinalBlock(bool isFinal);
void flushToStream();
private:
BitStream* mInputStream;
BitStream* mOutputStream;
unsigned mHlit{0};
unsigned mHdist{0};
unsigned mHclen{0};
using CodeLengthEntry = std::pair<unsigned char, unsigned>;
using CodeLengthCountEntry = std::pair<unsigned, std::vector<CodeLengthEntry> >;
std::vector<CodeLengthCountEntry> mCodeLengthMapping;
std::vector<unsigned char> mCodeLengthAlphabetLengths;
static constexpr unsigned CODE_LENGTH_ALPHABET_PERMUTATION[19]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
unsigned char mCompressionType{0};
bool mInFinalBlock{false};
static constexpr unsigned char NO_COMPRESSION = 0x00;
static constexpr unsigned char FIXED_HUFFMAN = 0x01;
static constexpr unsigned char DYNAMIC_HUFFMAN = 0x02;
static constexpr unsigned char ERROR = 0x03;
};

View file

@ -0,0 +1,38 @@
#include "DeflateEncoder.h"
#include "BitStream.h"
#include "DeflateBlock.h"
#include <iostream>
DeflateEncoder::DeflateEncoder(BitStream* inputStream, BitStream* outputStream)
: AbstractEncoder(inputStream, outputStream)
{
}
DeflateEncoder::~DeflateEncoder()
{
}
bool DeflateEncoder::encode()
{
return false;
}
bool DeflateEncoder::decode()
{
auto working_block = std::make_unique<DeflateBlock>(mInputStream, mOutputStream);
working_block->readHeader();
DeflateBlock* raw_block = working_block.get();
while(!raw_block->isFinalBlock())
{
break;
}
return false;
}

View file

@ -0,0 +1,25 @@
#pragma once
#include "AbstractEncoder.h"
#include <vector>
#include <memory>
class DeflateBlock;
class DeflateEncoder : public AbstractEncoder
{
public:
DeflateEncoder(BitStream* inputStream, BitStream* outputStream);
~DeflateEncoder();
bool encode() override;
bool decode() override;
private:
std::vector<std::unique_ptr<DeflateBlock > > mBlocks;
};

View file

@ -0,0 +1,170 @@
#include "Lz77Encoder.h"
#include "StringUtils.h"
#include "BitStream.h"
Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream)
: AbstractEncoder(inputStream, outputStream)
{
}
unsigned Lz77Encoder::lookAheadForMatchingChars(std::vector<char>& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc)
{
auto remaining_size = stream.size() - streamLoc;
unsigned num_hits{1};
for (unsigned jdx=1; jdx< remaining_size; jdx++)
{
char buffer_char{0};
if (searchIndex + jdx < mSearchBuffer.size())
{
buffer_char = mSearchBuffer[searchIndex + jdx];
}
else
{
buffer_char = stream[jdx - hitOffset];
}
auto lookahead_char = stream[streamLoc + jdx];
if (lookahead_char == buffer_char)
{
matchBuffer.push_back(buffer_char);
num_hits++;
}
else
{
break;
}
}
return num_hits;
}
void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc)
{
for(unsigned idx=0; idx<mSearchBuffer.size(); idx++)
{
auto search_index = mSearchBuffer.size() - idx - 1;
if (auto buffer_char = mSearchBuffer[search_index]; buffer_char == searchChar)
{
std::vector<char> match_buffer{buffer_char};
auto num_hits = lookAheadForMatchingChars(match_buffer, search_index, idx, stream, streamLoc);
if (num_hits >= hitLength)
{
hitLength = num_hits;
hitOffset = idx + 1;
}
}
}
}
bool Lz77Encoder::encode()
{
/*
unsigned loc{0};
std::string ret;
while(!mInputStream->isFinished())
{
auto search_char = stream[loc];
unsigned hit_length{0};
unsigned hit_offset{0};
lookThroughSearchBuffer(search_char, hit_length, hit_offset, stream, loc);
if (hit_length > 0)
{
ret += "@" + std::to_string(hit_offset) + "L" + std::to_string(hit_length);
loc+=hit_length;
auto hit_loc = mSearchBuffer.size() - hit_offset;
for(unsigned idx=hit_loc; idx<hit_loc + hit_length; idx++)
{
mSearchBuffer.push_back(mSearchBuffer[idx]);
}
}
else
{
ret += search_char;
mSearchBuffer.push_back(search_char);
loc++;
}
}
return ret;
*/
return false;
}
bool Lz77Encoder::decode()
{
/*
std::string ret;
unsigned loc{0};
while(loc < stream.size())
{
auto working_char = stream[loc];
if (working_char == '@')
{
unsigned loc_working = loc;
auto remainder = stream.size() - loc;
std::string offset;
unsigned length_loc{0};
for(unsigned jdx=0; jdx< remainder; jdx++)
{
loc++;
auto offset_char = stream[loc];
if (offset_char == 'L')
{
loc++;
break;
}
else
{
offset += offset_char;
}
}
unsigned offset_amount = std::stoul(offset);
std::string length;
remainder = stream.size() - loc;
for(unsigned jdx=0; jdx< remainder; jdx++)
{
auto length_char = stream[loc];
if (StringUtils::IsAlphabetical(length_char) || length_char == '@')
{
break;
}
else
{
loc++;
length += length_char;
}
}
unsigned length_amount = std::stoul(length);
auto buffer_index = ret.size() - offset_amount;
for(unsigned jdx=buffer_index;jdx<buffer_index+length_amount; jdx++)
{
ret += ret[jdx];
}
}
else
{
loc++;
ret += working_char;
}
}
return ret;
*/
return false;
}

View file

@ -1,168 +1,33 @@
#pragma once
#include "StringUtils.h"
#include "AbstractEncoder.h"
#include <string>
#include <vector>
class Lz77Encoder
class Lz77Encoder : public AbstractEncoder
{
public:
using DataStream = std::vector<char>;
using Buffer = std::vector<unsigned char>;
unsigned lookAheadForMatchingChars(std::vector<char>& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc)
{
auto remaining_size = stream.size() - streamLoc;
Lz77Encoder(BitStream* inputStream, BitStream* outputStream);
unsigned num_hits{1};
for (unsigned jdx=1; jdx< remaining_size; jdx++)
{
char buffer_char{0};
if (searchIndex + jdx < mSearchBuffer.size())
{
buffer_char = mSearchBuffer[searchIndex + jdx];
}
else
{
buffer_char = stream[jdx - hitOffset];
}
unsigned lookAheadForMatchingChars(std::vector<char>& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc);
auto lookahead_char = stream[streamLoc + jdx];
if (lookahead_char == buffer_char)
{
matchBuffer.push_back(buffer_char);
num_hits++;
}
else
{
break;
}
}
return num_hits;
}
void lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc);
void lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc)
{
for(unsigned idx=0; idx<mSearchBuffer.size(); idx++)
{
auto search_index = mSearchBuffer.size() - idx - 1;
bool encode() override;
if (auto buffer_char = mSearchBuffer[search_index]; buffer_char == searchChar)
{
std::vector<char> match_buffer{buffer_char};
auto num_hits = lookAheadForMatchingChars(match_buffer, search_index, idx, stream, streamLoc);
bool decode() override;
if (num_hits >= hitLength)
{
hitLength = num_hits;
hitOffset = idx + 1;
}
}
}
}
void setSearchBufferSize(unsigned size);
std::string encode(const std::string& stream)
{
unsigned loc{0};
std::string ret;
void setLookAheadBufferSize(unsigned size);
while(loc < stream.size())
{
auto search_char = stream[loc];
private:
unsigned mSearchBufferSize{32000};
Buffer mSearchBuffer;
unsigned hit_length{0};
unsigned hit_offset{0};
lookThroughSearchBuffer(search_char, hit_length, hit_offset, stream, loc);
if (hit_length > 0)
{
ret += "@" + std::to_string(hit_offset) + "L" + std::to_string(hit_length);
loc+=hit_length;
auto hit_loc = mSearchBuffer.size() - hit_offset;
for(unsigned idx=hit_loc; idx<hit_loc + hit_length; idx++)
{
mSearchBuffer.push_back(mSearchBuffer[idx]);
}
}
else
{
ret += search_char;
mSearchBuffer.push_back(search_char);
loc++;
}
}
return ret;
}
std::string decode(const std::string& stream)
{
std::string ret;
unsigned loc{0};
while(loc < stream.size())
{
auto working_char = stream[loc];
if (working_char == '@')
{
unsigned loc_working = loc;
auto remainder = stream.size() - loc;
std::string offset;
unsigned length_loc{0};
for(unsigned jdx=0; jdx< remainder; jdx++)
{
loc++;
auto offset_char = stream[loc];
if (offset_char == 'L')
{
loc++;
break;
}
else
{
offset += offset_char;
}
}
unsigned offset_amount = std::stoul(offset);
std::string length;
remainder = stream.size() - loc;
for(unsigned jdx=0; jdx< remainder; jdx++)
{
auto length_char = stream[loc];
if (StringUtils::IsAlphabetical(length_char) || length_char == '@')
{
break;
}
else
{
loc++;
length += length_char;
}
}
unsigned length_amount = std::stoul(length);
auto buffer_index = ret.size() - offset_amount;
for(unsigned jdx=buffer_index;jdx<buffer_index+length_amount; jdx++)
{
ret += ret[jdx];
}
}
else
{
loc++;
ret += working_char;
}
}
return ret;
}
DataStream mSearchBuffer;
DataStream mLookaheadBuffer;
unsigned mLookAheadBufferSize{256};
Buffer mLookaheadBuffer;
};

View file

@ -0,0 +1,74 @@
#include "ZlibEncoder.h"
#include "ByteUtils.h"
#include "DeflateEncoder.h"
#include "FileLogger.h"
#include <iostream>
ZlibEncoder::ZlibEncoder(BitStream* inputStream, BitStream* outputStream)
: AbstractEncoder(inputStream, outputStream)
{
}
ZlibEncoder::~ZlibEncoder()
{
}
void ZlibEncoder::setCompressionMethod(unsigned char method)
{
std::cout << "Got compression input " << static_cast<int>(method) << std::endl;
mCmf = method;
mCompressionMethod = ByteUtils::getLowerNBits(method, 4);
mCompressionInfo = ByteUtils::getHigherNBits(method, 4);
std::cout << "Got compression method " << static_cast<int>(mCompressionMethod) << " and info " << static_cast<int>(mCompressionInfo) << std::endl;
}
void ZlibEncoder::setExtraFlags(unsigned char extraFlags)
{
std::cout << "Got flags " << static_cast<int>(extraFlags) << std::endl;
mFlg = extraFlags;
mFlagCheck = ByteUtils::getLowerNBits(extraFlags, 5);
mFlagDict = ByteUtils::getBitN(extraFlags, 5);
mFlagLevel = ByteUtils::getHigherNBits(extraFlags, 2);
std::cout << "Got flag check " << static_cast<int>(mFlagCheck) << " and dict " << static_cast<int>(mFlagDict) << " and level " << static_cast<int>(mFlagLevel) << std::endl;
}
bool ZlibEncoder::encode()
{
if (!mWorkingEncoder)
{
if (mCompressionMethod == 8)
{
mWorkingEncoder = std::make_unique<DeflateEncoder>(mInputStream, mOutputStream);
}
else
{
MLOG_ERROR("Zib requested decoder not recognized: " << mCompressionMethod << " aborting encode");
return false;
}
}
return mWorkingEncoder->encode();
}
bool ZlibEncoder::decode()
{
if (!mWorkingEncoder)
{
if (mCompressionMethod == 8)
{
mWorkingEncoder = std::make_unique<DeflateEncoder>(mInputStream, mOutputStream);
}
else
{
MLOG_ERROR("Zib requested decoder not recognized: " << mCompressionMethod << " aborting decode");
return false;
}
}
return mWorkingEncoder->decode();
}

View file

@ -0,0 +1,34 @@
#pragma once
#include "AbstractEncoder.h"
#include <memory>
#include <vector>
class ZlibEncoder : public AbstractEncoder
{
public:
ZlibEncoder(BitStream* inputStream, BitStream* outputStream);
~ZlibEncoder();
void setCompressionMethod(unsigned char method);
void setExtraFlags(unsigned char extraFlags);
bool encode() override;
bool decode() override;
private:
unsigned char mCmf{0};
unsigned char mFlg{0};
unsigned char mCompressionMethod{8};
unsigned char mCompressionInfo{0};
unsigned char mFlagCheck{0};
unsigned char mFlagDict{0};
unsigned char mFlagLevel{0};
unsigned char mCheckValue{0};
std::unique_ptr<AbstractEncoder> mWorkingEncoder;
};