Initial fixed huffman coding for png.

This commit is contained in:
James Grogan 2022-11-28 10:16:04 +00:00
parent e4f9393ee7
commit 7f5009fb5e
39 changed files with 1294 additions and 440 deletions

View file

@ -1,7 +1,10 @@
list(APPEND compression_LIB_INCLUDES
StreamCompressor.cpp
HuffmanEncoder.cpp
huffman/HuffmanEncoder.cpp
huffman/HuffmanStream.cpp
huffman/HuffmanCodeLengthTable.cpp
huffman/HuffmanTree.cpp
RunLengthEncoder.cpp
ZlibEncoder.cpp
deflate/DeflateEncoder.cpp
@ -15,6 +18,7 @@ add_library(compression SHARED ${compression_LIB_INCLUDES})
target_include_directories(compression PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/deflate
${CMAKE_CURRENT_SOURCE_DIR}/huffman
)
target_link_libraries(compression PUBLIC core)

View file

@ -1,21 +0,0 @@
#pragma once
#include "RawTree.h"
#include <vector>
#include <unordered_map>
class HuffmanEncoder
{
using DataStream = std::vector<unsigned char>;
using CountPair = std::pair<unsigned char, unsigned>;
public:
void encode(const DataStream& stream);
void encode(const std::unordered_map<unsigned char, unsigned>& counts);
private:
void dumpTree(const RawTree<CountPair>& tree) const;
void dumpNode(RawNode<CountPair>* node, unsigned depth) const;
};

View file

@ -2,6 +2,10 @@
#include "StringUtils.h"
#include "BitStream.h"
#include "ByteUtils.h"
#include "HuffmanEncoder.h"
#include <iostream>
Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream)
: AbstractEncoder(inputStream, outputStream)
@ -61,8 +65,39 @@ void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength,
}
}
void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator)
{
mCodeGenerator = std::move(generator);
}
bool Lz77Encoder::encode()
{
if (!mCodeGenerator)
{
auto code_generator = std::make_unique<HuffmanEncoder>();
auto huffman_encoder = code_generator.get();
mCodeGenerator = std::move(code_generator);
huffman_encoder->setUseFixedCode(true);
huffman_encoder->initializeLiteralLengthTable();
}
while(auto byte = mInputStream->readNextByte())
{
const auto code = mCodeGenerator->getLiteralValue(*byte);
std::cout << "Writing value " << static_cast<int>(*byte) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
mOutputStream->writeNBits(code.getData(), code.getLength());
}
auto eos_code = mCodeGenerator->getEndOfStreamValue();
std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code.getData(), eos_code.getLength()) << "\n";
mOutputStream->writeNBits(eos_code.getData(), eos_code.getLength());
/*
unsigned loc{0};
std::string ret;

View file

@ -1,9 +1,13 @@
#pragma once
#include "AbstractEncoder.h"
#include "HuffmanEncoder.h"
#include <string>
#include <vector>
#include <memory>
class PrefixCodeGenerator;
class Lz77Encoder : public AbstractEncoder
{
@ -24,10 +28,14 @@ public:
void setLookAheadBufferSize(unsigned size);
void setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator);
private:
unsigned mSearchBufferSize{32000};
Buffer mSearchBuffer;
unsigned mLookAheadBufferSize{256};
Buffer mLookaheadBuffer;
std::unique_ptr<PrefixCodeGenerator> mCodeGenerator;
};

View file

@ -0,0 +1,54 @@
#include "RunLengthEncoder.h"
std::vector<RunLengthEncoder::Hit> RunLengthEncoder::encode(const std::vector<unsigned char>& input)
{
std::vector<RunLengthEncoder::Hit> ret;
if (input.empty())
{
return ret;
}
char working_char{0};
unsigned count = 1;
for(unsigned idx=0; idx<input.size(); idx++)
{
auto c = input[idx];
if (idx == 0)
{
working_char = c;
continue;
}
if (c == working_char)
{
count++;
}
else
{
ret.push_back({working_char, count});
working_char = c;
count = 1;
}
}
ret.push_back({working_char, count});
return ret;
}
std::vector<unsigned char> RunLengthEncoder::decode(const std::vector<RunLengthEncoder::Hit>& input)
{
std::vector<unsigned char> ret;
if (input.empty())
{
return ret;
}
for (const auto& hit : input)
{
for(unsigned idx=0; idx< hit.second; idx++)
{
ret.push_back(hit.first);
}
}
return ret;
}

View file

@ -1,110 +1,15 @@
#pragma once
#include "StringUtils.h"
#include <vector>
#include <string>
class RunLengthEncoder
{
public:
std::string encode(const std::string& string)
{
std::string ret;
if (string.empty())
{
return ret;
}
using Hit = std::pair<unsigned char, unsigned>;
char working_char{0};
unsigned count = 1;
for(unsigned idx=0; idx<string.size(); idx++)
{
auto c = string[idx];
if (idx == 0)
{
working_char = c;
continue;
}
std::vector<Hit> encode(const std::vector<unsigned char>& input);
if (c == working_char)
{
count++;
}
else
{
insertCharacter(ret, working_char, count);
working_char = c;
count = 1;
}
}
insertCharacter(ret, working_char, count);
return ret;
}
std::string decode(const std::string& string)
{
std::string ret;
if (string.empty())
{
return ret;
}
unsigned count{0};
while(count < string.size())
{
auto c = string[count];
if (c == mDelimiter)
{
count++;
std::string reps;
char working_char{0};
while(count < string.size())
{
auto rep_char = string[count];
count++;
if (StringUtils::IsAlphabetical(rep_char))
{
working_char = rep_char;
break;
}
else
{
reps += rep_char;
}
}
for (unsigned idx=0; idx<std::stoul(reps); idx++)
{
ret += working_char;
}
}
else
{
ret += c;
count++;
}
}
return ret;
}
std::vector<unsigned char> decode(const std::vector<Hit>& input);
private:
void insertCharacter(std::string& output, char c, unsigned count)
{
if (count >= 3)
{
output += mDelimiter + std::to_string(count) + c;
}
else
{
for (unsigned jdx=0;jdx<count; jdx++)
{
output += c;
}
}
}
char mDelimiter {'@'};
};

View file

@ -30,6 +30,10 @@ public:
~ZlibEncoder();
void setWindowSize(unsigned size);
void setDeflateCompressionMethod(Deflate::CompressionMethod method)
{
mDeflateCompressionMethod = method;
}
bool encode() override;
bool decode() override;

View file

@ -14,218 +14,6 @@ DeflateBlock::DeflateBlock(BitStream* inputStream, BitStream* outputStream)
}
bool DeflateBlock::readNextCodeLengthSymbol(unsigned char& final_symbol)
{
unsigned working_index{0};
auto count = mCodeLengthMapping[working_index].first;
auto delta = count;
bool found{false};
unsigned char buffer{0};
unsigned char working_bits{0};
unsigned working_symbol{0};
while(!found)
{
auto valid = mInputStream->readNextNBits(delta, buffer);
working_bits = (working_bits << delta) | buffer;
for(const auto& entry : mCodeLengthMapping[working_index].second)
{
if (entry.first == working_bits)
{
found = true;
working_symbol = entry.second;
break;
}
}
if (!found)
{
working_index++;
if (working_index >= mCodeLengthMapping.size())
{
break;
}
auto new_count = mCodeLengthMapping[working_index].first;
delta = new_count - count;
count = new_count;
}
}
if (found)
{
final_symbol = working_symbol;
std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl;
std::cout << "At Byte offset " << mInputStream->getCurrentByteOffset() << " and bit offset " << mInputStream->getCurrentBitOffset() << std::endl;
return true;
}
else
{
std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << std::endl;
return false;
}
}
void DeflateBlock::setCodeLengthAlphabetLengths(const std::vector<unsigned char>& lengths)
{
mCodeLengthAlphabetLengths = lengths;
}
void DeflateBlock::setCodeLengthLength(unsigned length)
{
mHclen = length;
}
void DeflateBlock::setLiteralsTableLength(unsigned length)
{
mHlit = length;
}
void DeflateBlock::setDistanceTableLength(unsigned length)
{
mHdist = length;
}
void DeflateBlock::setIsFinalBlock(bool isFinal)
{
mInFinalBlock = isFinal;
}
void DeflateBlock::flushToStream()
{
}
void DeflateBlock::readLiteralCodeLengths()
{
std::vector<unsigned> lengths;
unsigned char symbol{0};
while(lengths.size() < mHlit)
{
bool valid = readNextCodeLengthSymbol(symbol);
if (!valid)
{
std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
if (symbol < 16)
{
lengths.push_back(symbol);
}
else if(symbol == 16)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(2, num_reps);
auto last_val = lengths[lengths.size()-1];
std::cout << "Got val 16 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(last_val);
}
}
else if(symbol == 17)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(3, num_reps);
std::cout << "Got val 17 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(0);
}
}
else if(symbol == 18)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(7, num_reps);
std::cout << "Got val 18 doing " << 11 + num_reps << std::endl;
for(unsigned idx=0; idx< 11 + num_reps; idx++)
{
lengths.push_back(0);
}
}
}
}
void DeflateBlock::buildCodeLengthMapping()
{
for(unsigned idx=1; idx<8; idx++)
{
std::vector<unsigned> entries;
for(unsigned jdx=0; jdx<mCodeLengthAlphabetLengths.size(); jdx++)
{
if (mCodeLengthAlphabetLengths[jdx] == idx)
{
entries.push_back(jdx);
}
}
if (entries.empty())
{
continue;
}
CodeLengthCountEntry count_entry{idx, {}};
std::sort(entries.begin(), entries.end());
unsigned char offset = 0x01 << idx - 1;
unsigned char count{0};
for (auto entry : entries)
{
count_entry.second.push_back(CodeLengthEntry{offset + count, entry});
count++;
}
mCodeLengthMapping.push_back(count_entry);
}
for (const auto& map_data : mCodeLengthMapping)
{
std::cout << "Map entry " << map_data.first << " has vals: " << std::endl;
for (const auto& entry : map_data.second)
{
std::cout << "Key " << ByteUtils::toString(entry.first) << " val: " << entry.second << std::endl;
}
}
}
void DeflateBlock::readDynamicHuffmanTable()
{
unsigned char h_lit{0};
mInputStream->readNextNBits(5, h_lit);
mHlit = h_lit + 257;
std::cout << "Got HLIT " << mHlit << std::endl;
unsigned char h_dist{0};
mInputStream->readNextNBits(5, h_dist);
mHdist = h_dist + 1;
std::cout << "Got HDIST " << mHdist << std::endl;
unsigned char h_clen{0};
mInputStream->readNextNBits(4, h_clen);
mHclen = h_clen + 4;
std::cout << "Got HCLEN " << mHclen << std::endl;
mCodeLengthAlphabetLengths = std::vector<unsigned char>(19, 0);
unsigned char buffer{0};
for(unsigned idx = 0; idx< mHclen; idx++)
{
mInputStream->readNextNBits(3, buffer);
mCodeLengthAlphabetLengths[CODE_LENGTH_ALPHABET_PERMUTATION[idx]] = buffer;
std::cout << "Got code length for " << CODE_LENGTH_ALPHABET_PERMUTATION[idx] << " of " << static_cast<unsigned>(buffer) << std::endl;
}
buildCodeLengthMapping();
readLiteralCodeLengths();
}
std::string DeflateBlock::getMetaData() const
{
std::stringstream sstr;
@ -237,6 +25,11 @@ std::string DeflateBlock::getMetaData() const
return sstr.str();
}
void DeflateBlock::setIsFinalBlock(bool isFinal)
{
mInFinalBlock = isFinal;
}
bool DeflateBlock::isFinalBlock() const
{
return mInFinalBlock;
@ -245,7 +38,9 @@ bool DeflateBlock::isFinalBlock() const
bool DeflateBlock::read()
{
auto working_byte = *mInputStream->readNextByte();
std::cout << "Into process data, starts with: "<< ByteUtils::toString(working_byte) << std::endl;
std::cout << mInputStream->logNextNBytes(11);
std::cout << "DeflateBlock::read location " << mInputStream->logLocation();
unsigned char final_block{0};
mInputStream->readNextNBits(1, final_block);
@ -257,35 +52,64 @@ bool DeflateBlock::read()
if (mCompressionMethod == Deflate::CompressionMethod::NONE)
{
auto byte0 = *mInputStream->readNextByte();
auto byte1 = *mInputStream->readNextByte();
mUncompressedBlockLength = (byte0 << 8) | byte1;
std::cout << "Check block 0: " << ByteUtils::toString(byte0) << std::endl;
std::cout << "Check block 1: " << ByteUtils::toString(byte1) << std::endl;
auto byte2 = *mInputStream->readNextByte();
auto byte3 = *mInputStream->readNextByte();
uint16_t len_check = (byte2 << 8) | byte3;
std::cout << "Check block 2: " << ByteUtils::toString(byte2) << std::endl;
std::cout << "Check block 3: " << ByteUtils::toString(byte3) << std::endl;
//if (!(byte0 ==(~byte2) && byte1 ==(~byte3)))
//{
//std::cout << "Uncompressed block length check failed - aborting." << std::endl;
//return false;
//}
//else
//{
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
{
mOutputStream->writeByte(*mInputStream->readNextByte());
}
//}
return readUncompressedStream();
}
else if(mCompressionMethod == Deflate::CompressionMethod::FIXED_HUFFMAN)
{
return readFixedHuffmanStream();
}
else if(mCompressionMethod == Deflate::CompressionMethod::DYNAMIC_HUFFMAN)
{
return readDynamicHuffmanStream();
}
return false;
}
bool DeflateBlock::readUncompressedStream()
{
auto byte0 = *mInputStream->readNextByte();
auto byte1 = *mInputStream->readNextByte();
mUncompressedBlockLength = (byte0 << 8) | byte1;
std::cout << "Check block 0: " << ByteUtils::toString(byte0) << std::endl;
std::cout << "Check block 1: " << ByteUtils::toString(byte1) << std::endl;
auto byte2 = *mInputStream->readNextByte();
auto byte3 = *mInputStream->readNextByte();
uint16_t len_check = (byte2 << 8) | byte3;
std::cout << "Check block 2: " << ByteUtils::toString(byte2) << std::endl;
std::cout << "Check block 3: " << ByteUtils::toString(byte3) << std::endl;
//if (!(byte0 ==(~byte2) && byte1 ==(~byte3)))
//{
//std::cout << "Uncompressed block length check failed - aborting." << std::endl;
//return false;
//}
//else
//{
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
{
mOutputStream->writeByte(*mInputStream->readNextByte());
}
//}
return true;
}
bool DeflateBlock::readFixedHuffmanStream()
{
std::cout << "Reading fixed huffman stream" << std::endl;
mHuffmanStream = std::make_unique<HuffmanStream>(mInputStream, mOutputStream);
mHuffmanStream->generateFixedCodeMapping();
return mHuffmanStream->decode();
}
bool DeflateBlock::readDynamicHuffmanStream()
{
mHuffmanStream = std::make_unique<HuffmanStream>(mInputStream, mOutputStream);
return mHuffmanStream->decode();
}
void DeflateBlock::write(uint16_t datalength)
{
mUncompressedBlockLength = datalength;
@ -296,20 +120,38 @@ void DeflateBlock::write(uint16_t datalength)
if (mCompressionMethod == Deflate::CompressionMethod::NONE)
{
std::cout << "Writing compression block header " << ByteUtils::toString(working_block) << std::endl;
mOutputStream->writeByte(working_block);
std::cout << "Writing data length " << mUncompressedBlockLength << " " << ByteUtils::toString(mUncompressedBlockLength) << std::endl;
mOutputStream->writeWord(datalength);
std::cout << "Writing iverse data length " << ~mUncompressedBlockLength << " " << ByteUtils::toString(~mUncompressedBlockLength) << std::endl;
mOutputStream->writeWord(static_cast<uint16_t>(~mUncompressedBlockLength));
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
writeUncompressedStream(working_block, datalength);
}
else if (mCompressionMethod == Deflate::CompressionMethod::FIXED_HUFFMAN)
{
mOutputStream->writeNBits(working_block, 3);
while(auto byte = mInputStream->readNextByte())
{
auto byte = *mInputStream->readNextByte();
//std::cout << "Writing next byte " << static_cast<int>(byte) << std::endl;
mOutputStream->writeByte(byte);
mOutputStream->writeByte(*byte);
}
if (const auto& remaining_bits = mInputStream->getRemainingBits(); remaining_bits.second > 0)
{
mOutputStream->writeNBits(remaining_bits.first, remaining_bits.second);
}
}
}
void DeflateBlock::writeUncompressedStream(unsigned char working_byte, uint16_t datalength)
{
std::cout << "Writing compression block header " << ByteUtils::toString(working_byte) << std::endl;
mOutputStream->writeByte(working_byte);
std::cout << "Writing data length " << mUncompressedBlockLength << " " << ByteUtils::toString(mUncompressedBlockLength) << std::endl;
mOutputStream->writeWord(datalength);
std::cout << "Writing iverse data length " << ~mUncompressedBlockLength << " " << ByteUtils::toString(~mUncompressedBlockLength) << std::endl;
mOutputStream->writeWord(static_cast<uint16_t>(~mUncompressedBlockLength));
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
{
auto byte = *mInputStream->readNextByte();
//std::cout << "Writing next byte " << static_cast<int>(byte) << std::endl;
mOutputStream->writeByte(byte);
}
}

View file

@ -1,9 +1,12 @@
#pragma once
#include "DeflateElements.h"
#include "HuffmanStream.h"
#include "BitStream.h"
#include <memory>
class AbstractChecksumCalculator;
class DeflateBlock
@ -11,51 +14,33 @@ class DeflateBlock
public:
DeflateBlock(BitStream* inputStream, BitStream* outputStream);
void buildCodeLengthMapping();
std::string getMetaData() const;
void flushToStream();
bool isFinalBlock() const;
bool read();
void readDynamicHuffmanTable();
void readLiteralCodeLengths();
bool readNextCodeLengthSymbol(unsigned char& buffer);
void setCodeLengthAlphabetLengths(const std::vector<unsigned char>& lengths);
void setCodeLengthLength(unsigned length);
void setLiteralsTableLength(unsigned length);
void setDistanceTableLength(unsigned length);
void setIsFinalBlock(bool isFinal);
void setCompressionMethod(Deflate::CompressionMethod method)
{
mCompressionMethod = method;
}
void write(uint16_t datalength);
private:
bool readUncompressedStream();
bool readFixedHuffmanStream();
bool readDynamicHuffmanStream();
void writeUncompressedStream(unsigned char working_byte, uint16_t datalength);
BitStream* mInputStream;
BitStream* mOutputStream;
unsigned mHlit{0};
unsigned mHdist{0};
unsigned mHclen{0};
std::unique_ptr<HuffmanStream> mHuffmanStream;
uint16_t mUncompressedBlockLength{0};
using CodeLengthEntry = std::pair<unsigned char, unsigned>;
using CodeLengthCountEntry = std::pair<unsigned, std::vector<CodeLengthEntry> >;
std::vector<CodeLengthCountEntry> mCodeLengthMapping;
std::vector<unsigned char> mCodeLengthAlphabetLengths;
static constexpr unsigned CODE_LENGTH_ALPHABET_PERMUTATION[19]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
bool mInFinalBlock{false};
Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::NONE};
};

View file

@ -1,6 +1,7 @@
#include "DeflateEncoder.h"
#include "BitStream.h"
#include "ByteUtils.h"
#include "DeflateBlock.h"
#include "BufferBitStream.h"
@ -22,6 +23,7 @@ bool DeflateEncoder::encode()
uint16_t count = 0;
BufferBitStream stream;
std::unique_ptr<DeflateBlock> working_block = std::make_unique<DeflateBlock>(&stream, mOutputStream);
working_block->setCompressionMethod(mCompressionMethod);
AbstractChecksumCalculator* checksum_calc;
if (mChecksumCalculators.size() > 0)
@ -38,15 +40,22 @@ bool DeflateEncoder::encode()
working_block->write(count);
working_block = std::make_unique<DeflateBlock>(&stream, mOutputStream);
working_block->setCompressionMethod(mCompressionMethod);
stream.reset();
}
if (auto byte = mInputStream->readNextByte())
{
std::cout << "Adding byte " << ByteUtils::toString(*byte) << " to deflate block input" << std::endl;
stream.writeByte(*byte);
}
else
{
if (const auto& remaining_bits = mInputStream->getRemainingBits(); remaining_bits.second > 0)
{
stream.writeNBits(remaining_bits.first, remaining_bits.second);
}
stream.resetOffsets();
working_block->setIsFinalBlock(true);
@ -56,7 +65,7 @@ bool DeflateEncoder::encode()
}
count++;
}
mOutputStream->flushRemainingBits();
mOutputStream->clearChecksumCalculator();
return true;
}

View file

@ -0,0 +1,175 @@
#include "HuffmanCodeLengthTable.h"
#include "ByteUtils.h"
#include "RunLengthEncoder.h"
#include <algorithm>
#include <sstream>
#include <iostream>
void HuffmanCodeLengthTable::buildCompressedLengthSequence()
{
RunLengthEncoder rl_encoder;
auto rle_encoded = rl_encoder.encode(mInputLengthSequence);
for (const auto& entry : rle_encoded)
{
std::cout << "Got rle " << static_cast<int>(entry.first) << " | " << entry.second << std::endl;
}
mCompressedLengthSequence.clear();
for (const auto& entry : rle_encoded)
{
const auto length = entry.first;
const auto count = entry.second;
if (count < 3)
{
for(unsigned idx=0; idx<3; idx++)
{
mCompressedLengthSequence.push_back({length, 0});
}
}
else if (length == 0)
{
if(count >=3 && count <=10)
{
mCompressedLengthSequence.push_back({17, count-3});
}
else
{
mCompressedLengthSequence.push_back({18, count-11});
}
}
else
{
mCompressedLengthSequence.push_back({length, 0});
auto num_blocks_of_six = (count-1)/6;
for(unsigned idx=0; idx<num_blocks_of_six; idx++)
{
mCompressedLengthSequence.push_back({16, 3});
}
auto remaining_counts = (count-1) % 6;
if (remaining_counts >= 3)
{
mCompressedLengthSequence.push_back({16, remaining_counts - 3});
}
else
{
for(unsigned idx=0; idx<remaining_counts; idx++)
{
mCompressedLengthSequence.push_back({length, 0});
}
}
}
}
mCompressedLengthCounts = std::vector<unsigned>(19, 0);
for (const auto& entry : mCompressedLengthSequence)
{
mCompressedLengthCounts[entry.first]++;
}
}
const std::vector<HuffmanCodeLengthTable::CompressedSequenceEntry>& HuffmanCodeLengthTable::getCompressedLengthSequence() const
{
return mCompressedLengthSequence;
}
const std::vector<unsigned> HuffmanCodeLengthTable::getCompressedLengthCounts() const
{
return mCompressedLengthCounts;
}
void HuffmanCodeLengthTable::buildPrefixCodes()
{
if(mInputLengthSequence.empty())
{
return;
}
unsigned char max_length = *std::max_element(mInputLengthSequence.begin(), mInputLengthSequence.end());
std::vector<unsigned> counts(max_length+1, 0);
for (const auto length : mInputLengthSequence)
{
counts[length]++;
}
counts[0] = 0;
uint32_t code{0};
std::vector<uint32_t> next_code(max_length + 1, 0);
for (unsigned bits = 1; bits <= max_length; bits++)
{
code = (code + counts[bits-1]) << 1;
next_code[bits] = code;
}
for(std::size_t idx=0; idx<mInputLengthSequence.size(); idx++)
{
if (const auto length = mInputLengthSequence[idx]; length != 0)
{
const auto code = next_code[length];
next_code[length]++;
auto prefix_code = PrefixCode(code, length);
mTree.addCodeLengthEntry(length, {PrefixCode(code, length), idx});
mCodes.push_back(prefix_code);
}
}
mTree.sortTable();
//std::cout << dumpPrefixCodes();
}
const PrefixCode& HuffmanCodeLengthTable::getCode(std::size_t index) const
{
return mCodes[index];
}
std::string HuffmanCodeLengthTable::dumpPrefixCodes() const
{
return mTree.dump();
}
unsigned HuffmanCodeLengthTable::mapToDeflateIndex(unsigned index) const
{
if (index>= DEFLATE_PERMUTATION_SIZE)
{
return 0;
}
else
{
return DEFLATE_PERMUTATION[index];
}
}
unsigned HuffmanCodeLengthTable::getNumCodeLengths() const
{
return mTree.getNumCodeLengths();
}
std::optional<HuffmanTree::Symbol> HuffmanCodeLengthTable::findMatch(std::size_t treeIndex, uint32_t code) const
{
return mTree.findMatch(treeIndex, code);
}
unsigned HuffmanCodeLengthTable::getCodeLength(std::size_t index) const
{
return mTree.getCodeLength(index);
}
void HuffmanCodeLengthTable::setInputLengthSequence(const std::vector<unsigned char>& sequence, bool targetDeflate)
{
mTargetDeflate = targetDeflate;
if (targetDeflate)
{
mInputLengthSequence = std::vector<unsigned char>(DEFLATE_PERMUTATION_SIZE, 0);
for(unsigned idx=0; idx<sequence.size(); idx++)
{
mInputLengthSequence[mapToDeflateIndex(idx)] = sequence[idx];
//std::cout << "Got code length for " << mapToDeflateIndex(idx) << " of " << static_cast<unsigned>(sequence[idx]) << std::endl;
}
}
else
{
mInputLengthSequence = sequence;
}
}

View file

@ -0,0 +1,50 @@
#pragma once
#include "HuffmanTree.h"
#include <vector>
#include <string>
#include <optional>
class HuffmanCodeLengthTable
{
public:
void buildPrefixCodes();
void buildCompressedLengthSequence();
std::string dumpPrefixCodes() const;
std::optional<HuffmanTree::Symbol> findMatch(std::size_t treeIndex, uint32_t code) const;
const HuffmanTree& getTree() const;
const PrefixCode& getCode(std::size_t index) const;
using CompressedSequenceEntry = std::pair<unsigned, unsigned>;
const std::vector<CompressedSequenceEntry>& getCompressedLengthSequence() const;
const std::vector<unsigned> getCompressedLengthCounts() const;
unsigned getNumCodeLengths() const;
unsigned getCodeLength(std::size_t treeIndex) const;
unsigned mapToDeflateIndex(unsigned index) const;
void setInputLengthSequence(const std::vector<unsigned char>& sequence, bool targetDeflate = true);
private:
HuffmanTree mTree;
bool mTargetDeflate{true};
std::vector<unsigned char> mInputLengthSequence;
std::vector<PrefixCode> mCodes;
std::vector<CompressedSequenceEntry> mCompressedLengthSequence;
std::vector<unsigned> mCompressedLengthCounts;
static constexpr unsigned DEFLATE_PERMUTATION_SIZE{19};
static constexpr unsigned DEFLATE_PERMUTATION[DEFLATE_PERMUTATION_SIZE]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
};

View file

@ -0,0 +1,67 @@
#pragma once
#include "RawTree.h"
#include "HuffmanCodeLengthTable.h"
#include "HuffmanFixedCodes.h"
#include <vector>
#include <unordered_map>
class PrefixCodeGenerator
{
public:
virtual ~PrefixCodeGenerator() = default;
virtual const PrefixCode& getLiteralValue(unsigned char value) const = 0;
virtual const PrefixCode& getEndOfStreamValue() const = 0;
};
class HuffmanEncoder : public PrefixCodeGenerator
{
using DataStream = std::vector<unsigned char>;
using CountPair = std::pair<unsigned char, unsigned>;
public:
void encode(const DataStream& stream);
void encode(const std::unordered_map<unsigned char, unsigned>& counts);
void setUseFixedCode(bool useFixed)
{
mUseFixedCode = useFixed;
}
uint32_t getLengthValue(unsigned length)
{
return 0;
}
const PrefixCode& getLiteralValue(unsigned char value) const override
{
return mLiteralLengthTable.getCode(value);
}
const PrefixCode& getEndOfStreamValue() const override
{
return mLiteralLengthTable.getCode(256);
}
void initializeLiteralLengthTable()
{
if(mUseFixedCode)
{
mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
mLiteralLengthTable.buildPrefixCodes();
}
}
private:
void dumpTree(const RawTree<CountPair>& tree) const;
void dumpNode(RawNode<CountPair>* node, unsigned depth) const;
bool mUseFixedCode{false};
bool mTableIsInitialized{false};
HuffmanCodeLengthTable mLiteralLengthTable;
HuffmanCodeLengthTable mDistanceTable;
};

View file

@ -0,0 +1,20 @@
#pragma once
#include <vector>
namespace HuffmanFixedCodes
{
inline std::vector<unsigned char> getDeflateFixedHuffmanCodes()
{
std::vector<std::pair<unsigned, unsigned char> > mappings {{144, 8}, {112, 9}, {24, 7}, {8 ,8}};
std::vector<unsigned char> sequence;
for(const auto& entry : mappings)
{
for(unsigned idx=0;idx<entry.first;idx++)
{
sequence.push_back(entry.second);
}
}
return sequence;
}
}

View file

@ -0,0 +1,202 @@
#include "HuffmanStream.h"
#include "ByteUtils.h"
#include "HuffmanFixedCodes.h"
#include <iostream>
#include <algorithm>
#include <unordered_map>
#include <sstream>
HuffmanStream::HuffmanStream(BitStream* inputStream, BitStream* outputStream)
: mInputStream(inputStream),
mOutputStream(outputStream)
{
}
void HuffmanStream::generateFixedCodeMapping()
{
mUsingFixedCodes = true;
mCodeLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
mCodeLengthTable.buildPrefixCodes();
}
bool HuffmanStream::readNextCodeLengthSymbol(unsigned& final_symbol)
{
if (mCodeLengthTable.getNumCodeLengths() == 0)
{
return false;
}
unsigned working_index{0};
auto length = mCodeLengthTable.getCodeLength(working_index);
auto delta = length;
bool found{false};
unsigned char buffer{0};
uint32_t working_bits{0};
unsigned working_symbol{0};
while(!found)
{
auto valid = mInputStream->readNextNBits(delta, buffer);
//std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;;
working_bits = working_bits | (buffer << (length - delta));
std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl;
if (const auto symbol = mCodeLengthTable.findMatch(working_index, working_bits))
{
found = true;
working_symbol = *symbol;
}
else
{
working_index++;
if (working_index >= mCodeLengthTable.getNumCodeLengths())
{
break;
}
auto new_length = mCodeLengthTable.getCodeLength(working_index);
delta = new_length - length;
length = new_length;
}
}
if (found)
{
final_symbol = working_symbol;
std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl;
std::cout << "At Byte offset " << mInputStream->getCurrentByteOffset() << " and bit offset " << mInputStream->getCurrentBitOffset() << std::endl;
return true;
}
else
{
std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << std::endl;
return false;
}
}
void HuffmanStream::readLiteralCodeLengths()
{
std::vector<unsigned> lengths;
unsigned symbol{0};
while(lengths.size() < mNumLiterals)
{
bool valid = readNextCodeLengthSymbol(symbol);
if (!valid)
{
std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
if (symbol < 16)
{
lengths.push_back(symbol);
}
else if(symbol == 16)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(2, num_reps);
auto last_val = lengths[lengths.size()-1];
std::cout << "Got val 16 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(last_val);
}
}
else if(symbol == 17)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(3, num_reps);
std::cout << "Got val 17 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(0);
}
}
else if(symbol == 18)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(7, num_reps);
std::cout << "Got val 18 doing " << 11 + num_reps << std::endl;
for(unsigned idx=0; idx< 11 + num_reps; idx++)
{
lengths.push_back(0);
}
}
}
}
bool HuffmanStream::decode()
{
if (!mUsingFixedCodes)
{
readCodingsTable();
}
else
{
bool found_end_seq{false};
unsigned symbol{0};
while(!found_end_seq)
{
bool valid = readNextCodeLengthSymbol(symbol);
if (!valid)
{
std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
if (symbol == 256)
{
found_end_seq = true;
break;
}
}
}
return false;
}
void HuffmanStream::readCodingsTable()
{
unsigned char h_lit{0};
mInputStream->readNextNBits(5, h_lit);
mNumLiterals = h_lit + 257;
std::cout << "Got HLIT " << mNumLiterals << std::endl;
unsigned char h_dist{0};
mInputStream->readNextNBits(5, h_dist);
mNumDistances = h_dist + 1;
std::cout << "Got HDIST " << mNumDistances << std::endl;
unsigned char h_clen{0};
mInputStream->readNextNBits(4, h_clen);
auto num_code_lengths = h_clen + 4;
std::cout << "Got HCLEN " << num_code_lengths << std::endl;
auto sequence = std::vector<unsigned char>(num_code_lengths, 0);
unsigned char buffer{0};
for(unsigned idx = 0; idx< num_code_lengths; idx++)
{
std::cout << "After codings " << mInputStream->logLocation();
mInputStream->readNextNBits(3, buffer);
sequence[idx] = buffer;
}
mCodeLengthTable.setInputLengthSequence(sequence, true);
mCodeLengthTable.buildPrefixCodes();
readLiteralCodeLengths();
}

View file

@ -0,0 +1,38 @@
#pragma once
#include "BitStream.h"
#include "HuffmanCodeLengthTable.h"
#include <vector>
#include <string>
class HuffmanStream
{
public:
HuffmanStream(BitStream* inputStream, BitStream* outputStream);
bool decode();
void generateFixedCodeMapping();
void setCodeLengthAlphabetLengths(const std::vector<unsigned char>& lengths);
private:
void readCodingsTable();
void readLiteralCodeLengths();
bool readNextCodeLengthSymbol(unsigned& buffer);
BitStream* mInputStream;
BitStream* mOutputStream;
unsigned mNumLiterals{0}; // HLIT + 257
unsigned mNumDistances{0}; // HDIST + 1
bool mUsingFixedCodes{false};
HuffmanCodeLengthTable mCodeLengthTable;
};

View file

@ -0,0 +1,106 @@
#include "HuffmanTree.h"
#include "ByteUtils.h"
#include <sstream>
#include <algorithm>
#include <iostream>
PrefixCode::PrefixCode(uint32_t data, unsigned length)
: mLength(length)
{
mData = ByteUtils::mirror(data, length);
}
bool PrefixCode::matches(unsigned length, uint32_t code) const
{
return (mLength == length) && (mData == code);
}
std::string PrefixCode::toString(bool bitsAsRightToLeft) const
{
if (bitsAsRightToLeft)
{
if (mLength <=8 )
{
return ByteUtils::toString(mData).substr(8 - mLength, mLength);
}
else
{
return ByteUtils::toString(mData, mLength);
}
}
else
{
if (mLength <=8 )
{
return ByteUtils::toString(ByteUtils::mirror(mData, mLength)).substr(0, mLength);
}
else
{
return ByteUtils::toString(mData, mLength);
}
}
}
void HuffmanTree::addCodeLengthEntry(unsigned length, const CodeSymbolPair& data)
{
bool found{false};
for (auto& entry : mTable)
{
if (entry.first == length)
{
entry.second.push_back(data);
found = true;
break;
}
}
if (!found)
{
mTable.push_back({length, {data}});
}
}
void HuffmanTree::sortTable()
{
std::sort(mTable.begin(), mTable.end(), [](CodeLengthData a, CodeLengthData b){return a.first < b.first;});
}
std::optional<HuffmanTree::Symbol> HuffmanTree::findMatch(std::size_t treeIndex, uint32_t code) const
{
const auto& legth_data = mTable[treeIndex];
for(const auto& entry : legth_data.second)
{
//std::cout << "Checking if " << entry.second << " matches code " << ByteUtils::toString(code) << std::endl;;
if (entry.first.matches(legth_data.first, code))
{
return entry.second;
}
}
return std::nullopt;
}
std::size_t HuffmanTree::getNumCodeLengths() const
{
return mTable.size();
}
unsigned HuffmanTree::getCodeLength(std::size_t idx) const
{
return mTable[idx].first;
}
std::string HuffmanTree::dump(bool bitsAsRightToLeft) const
{
std::stringstream sstr;
for (const auto& code_length_data : mTable)
{
sstr << "Prefix table for Code Length " << code_length_data.first << " has vals: \n";
for (const auto& entry : code_length_data.second)
{
sstr << "Code " << entry.first.toString(bitsAsRightToLeft) << " Symbol: " << entry.second << '\n';
}
}
return sstr.str();
}

View file

@ -0,0 +1,52 @@
#pragma once
#include <vector>
#include <string>
#include <optional>
class PrefixCode
{
public:
PrefixCode(uint32_t data, unsigned length);
std::string toString(bool bitsAsRightToLeft = true) const;
bool matches(unsigned length, uint32_t code) const;
uint32_t getData() const
{
return mData;
}
unsigned getLength() const
{
return mLength;
}
private:
unsigned mLength{0};
uint32_t mData{0};
};
class HuffmanTree
{
public:
using Symbol = unsigned;
using CodeLength = unsigned;
using CodeSymbolPair = std::pair<PrefixCode, Symbol>;
using CodeLengthData = std::pair<CodeLength, std::vector<CodeSymbolPair> >;
void addCodeLengthEntry(unsigned length, const CodeSymbolPair& data);
std::string dump(bool bitsAsRightToLeft = true) const;
std::optional<HuffmanTree::Symbol> findMatch(std::size_t treeIndex, uint32_t code) const;
std::size_t getNumCodeLengths() const;
unsigned getCodeLength(std::size_t idx) const;
void sortTable();
private:
std::vector<CodeLengthData> mTable;
};