Clean project structure.

This commit is contained in:
jmsgrogan 2023-01-17 10:13:25 +00:00
parent 78a4fa99ff
commit 947bf937fd
496 changed files with 206 additions and 137 deletions

View file

@ -0,0 +1,34 @@
#pragma once
#include "AbstractChecksumCalculator.h"
#include <vector>
class BitStream;
class AbstractEncoder
{
public:
AbstractEncoder(BitStream* inputStream, BitStream* outputStream)
: mInputStream(inputStream),
mOutputStream(outputStream)
{
}
virtual ~AbstractEncoder() = default;
void addChecksumCalculator(AbstractChecksumCalculator* calculator)
{
mChecksumCalculators.push_back(calculator);
}
virtual bool encode() = 0;
virtual bool decode() = 0;
protected:
std::vector<AbstractChecksumCalculator*> mChecksumCalculators;
BitStream* mInputStream{nullptr};
BitStream* mOutputStream{nullptr};
};

View file

@ -0,0 +1,29 @@
#pragma once
#include "AbstractChecksumCalculator.h"
class Adler32Checksum : public AbstractChecksumCalculator
{
public:
void addValue(unsigned char val) override
{
mSum1 = (mSum1 + val) % MOD_ADLER32;
mSum2 = (mSum2 + mSum1) % MOD_ADLER32;
}
uint32_t getChecksum() const override
{
return (mSum2 << 16) | mSum1;
}
void reset() override
{
mSum1 = 1;
mSum2 = 0;
}
private:
static constexpr unsigned MOD_ADLER32{65536};
uint32_t mSum1{1};
uint32_t mSum2{0};
};

View file

@ -0,0 +1,27 @@
set(MODULE_NAME compression)
list(APPEND SOURCES
StreamCompressor.cpp
huffman/HuffmanEncoder.cpp
huffman/HuffmanStream.cpp
huffman/HuffmanCodeLengthTable.cpp
huffman/HuffmanTree.cpp
RunLengthEncoder.cpp
ZlibEncoder.cpp
deflate/DeflateEncoder.cpp
deflate/DeflateBlock.cpp
Lz77Encoder.cpp
CyclicRedundancyChecker.cpp
)
add_library(${MODULE_NAME} SHARED ${SOURCES})
target_include_directories(${MODULE_NAME} PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/deflate
${CMAKE_CURRENT_SOURCE_DIR}/huffman
)
target_link_libraries(${MODULE_NAME} PUBLIC core)
set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER src/base)
set_target_properties( ${MODULE_NAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON )

View file

@ -0,0 +1,43 @@
#include "CyclicRedundancyChecker.h"
void CyclicRedundancyChecker::createTable()
{
mTable = std::vector<unsigned long>(TABLE_SIZE, 0);
unsigned long c{0};
for (int n = 0; n < TABLE_SIZE; n++)
{
c = (unsigned long) n;
for (int k = 0; k < 8; k++)
{
if (c & 1)
{
c = 0xedb88320L ^ (c >> 1);
}
else
{
c = c >> 1;
}
}
mTable[n] = c;
}
mTableComputed = true;
}
void CyclicRedundancyChecker::addValue(unsigned char val)
{
if (!mTableComputed)
{
createTable();
}
mLastValue = mTable[(mLastValue ^ val) & 0xff] ^ (mLastValue >> 8);
}
uint32_t CyclicRedundancyChecker::getChecksum() const
{
return mLastValue ^ 0xffffffffL;
}
void CyclicRedundancyChecker::reset()
{
mLastValue = 0xffffffffL;
}

View file

@ -0,0 +1,24 @@
#pragma once
#include "AbstractChecksumCalculator.h"
#include <vector>
class CyclicRedundancyChecker : public AbstractChecksumCalculator
{
public:
void addValue(unsigned char val) override;
uint32_t getChecksum() const override;
void reset() override;
private:
void createTable();
bool mTableComputed{false};
uint32_t mLastValue{0xffffffffL};
static const std::size_t TABLE_SIZE{ 256 };
std::vector<unsigned long> mTable;
};

View file

@ -0,0 +1,330 @@
#include "Lz77Encoder.h"
#include "StringUtils.h"
#include "BitStream.h"
#include "ByteUtils.h"
#include "HuffmanEncoder.h"
#include <iostream>
Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream)
: AbstractEncoder(inputStream, outputStream),
mSearchBuffer(mSearchBufferSize),
mLookaheadBuffer(mLookAheadBufferSize)
{
}
void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator)
{
mCodeGenerator = std::move(generator);
}
bool Lz77Encoder::hitBufferFull() const
{
return mHitBuffer.size() == mMaxHitBufferSize;
}
void Lz77Encoder::populateSearchBuffer(const Hit& hit)
{
const auto& [length, distance, next_char] = hit;
if (length == 0)
{
mSearchBuffer.addItem(next_char);
}
else
{
std::vector<unsigned char> new_items(distance, 0);
for(unsigned idx=0 ;idx<distance; idx++)
{
new_items[idx] = getSearchBufferItem(idx);
}
for(auto item : new_items)
{
mSearchBuffer.addItem(item);
}
int difference = int(length) - distance;
if (difference > 0)
{
for(unsigned idx=0; idx<unsigned(difference); idx++)
{
mSearchBuffer.addItem(mLookaheadBuffer.getItem(idx));
}
}
}
}
unsigned char Lz77Encoder::getSearchBufferItem(unsigned index) const
{
return mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - index);
}
unsigned Lz77Encoder::lookAheadForMatchingChars(unsigned distance)
{
unsigned length{0};
for(unsigned idx=0; idx<unsigned(mMaxLookAheadBufferIndex + 1); idx++)
{
int search_offset = int(distance-1) - idx;
unsigned char search_char{0};
if (search_offset < 0)
{
search_char = mLookaheadBuffer.getItem(-search_offset - 1);
}
else
{
search_char = getSearchBufferItem(static_cast<unsigned>(search_offset));
}
unsigned char lookahead_char = mLookaheadBuffer.getItem(idx);
if ((lookahead_char != search_char) || (idx == mMaxLookAheadBufferIndex))
{
if (idx + 1>= mMinLengthMatchSize)
{
length = idx + 1;
}
break;
}
}
return length;
}
void Lz77Encoder::lookForMatches(unsigned char searchChar, unsigned& hitLength, unsigned& hitOffset)
{
for (unsigned idx = 0; idx< mSearchBuffer.getNumItems(); idx++)
{
if (mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - idx) == searchChar)
{
auto num_hits = lookAheadForMatchingChars(idx + 1);
if (num_hits > 0 && num_hits >= hitLength)
{
hitLength = num_hits;
hitOffset = idx + 1;
}
}
}
}
bool Lz77Encoder::lookAheadSourceEmpty() const
{
if (mLookaheadBuffer.getNumItems() < mLookAheadBufferSize)
{
return true;
}
if (mMaxLookAheadBufferIndex < int(mLookAheadBufferSize) - 1)
{
return true;
}
return false;
}
void Lz77Encoder::populateLookaheadBuffer(unsigned size, bool firstPass)
{
if (!firstPass && lookAheadSourceEmpty())
{
for(unsigned idx=0; idx<size; idx++)
{
mLookaheadBuffer.addItem(0);
mMaxLookAheadBufferIndex--;
}
return;
}
bool stream_finished{false};
unsigned stream_end_id{0};
for(unsigned idx=0; idx<size; idx++)
{
if (!stream_finished)
{
auto byte = mInputStream->readNextByte();
if (!byte)
{
stream_finished = true;
stream_end_id = idx -1;
mLookaheadBuffer.addItem(0);
mMaxLookAheadBufferIndex--;
continue;
}
else
{
mLookaheadBuffer.addItem(*byte);
}
}
else
{
mLookaheadBuffer.addItem(0);
mMaxLookAheadBufferIndex--;
}
}
if (stream_finished && firstPass)
{
mMaxLookAheadBufferIndex = stream_end_id;
}
}
bool Lz77Encoder::encode()
{
if (!mCodeGenerator)
{
mCodeGenerator = std::make_unique<HuffmanEncoder>();
}
// Fill the lookahead buffer
mMaxLookAheadBufferIndex = mLookAheadBufferSize - 1;
populateLookaheadBuffer(mLookAheadBufferSize, true);
if(mMaxLookAheadBufferIndex < 0)
{
return true;
}
bool input_stream_ended{false};
while(!hitBufferFull())
{
if (mMaxLookAheadBufferIndex < 0)
{
input_stream_ended = true;
break;
}
const auto working_byte = mLookaheadBuffer.getItem(0);
unsigned hit_length{0};
unsigned hit_distance{0};
lookForMatches(working_byte, hit_length, hit_distance);
const Hit hit{hit_length, hit_distance, working_byte};
mHitBuffer.push_back(hit);
populateSearchBuffer(hit);
if (hit_length == 0)
{
populateLookaheadBuffer(1);
}
else
{
populateLookaheadBuffer(hit_length);
}
}
return input_stream_ended;
}
const std::vector<Lz77Encoder::Hit>& Lz77Encoder::getHitBuffer() const
{
return mHitBuffer;
}
/*
void Lz77Encoder::flushHitBuffer()
{
// If dynamic huffman build trees
if (!mCodeGenerator)
{
mCodeGenerator = std::make_unique<HuffmanEncoder>();
}
// Convert hit buffer to prefix codes and write to output stream
for (const auto& hit : mHitBuffer)
{
const auto& [length, distance, next_char] = hit;
PrefixCode code;
if (length == 0)
{
code = *mCodeGenerator->getLiteralValue(next_char);
std::cout << "Writing symbol " << static_cast<int>(next_char) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
mOutputStream->writeNBits(code.getData(), code.getLength());
}
else
{
code = *mCodeGenerator->getLengthValue(length);
const auto distance_code = mCodeGenerator->getDistanceValue(distance);
std::cout << "Writing length " << length << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
mOutputStream->writeNBits(code.getData(), code.getLength());
std::cout << "Writing distance " << distance << " with code " << ByteUtils::toString(distance_code.getData(), distance_code.getLength()) << "\n";
mOutputStream->writeNBits(distance_code.getData(), distance_code.getLength());
}
}
auto eos_code = mCodeGenerator->getEndOfStreamValue();
std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code->getData(), eos_code->getLength()) << "\n";
mOutputStream->writeNBits(eos_code->getData(), eos_code->getLength());
}
*/
bool Lz77Encoder::decode()
{
/*
std::string ret;
unsigned loc{0};
while(loc < stream.size())
{
auto working_char = stream[loc];
if (working_char == '@')
{
unsigned loc_working = loc;
auto remainder = stream.size() - loc;
std::string offset;
unsigned length_loc{0};
for(unsigned jdx=0; jdx< remainder; jdx++)
{
loc++;
auto offset_char = stream[loc];
if (offset_char == 'L')
{
loc++;
break;
}
else
{
offset += offset_char;
}
}
unsigned offset_amount = std::stoul(offset);
std::string length;
remainder = stream.size() - loc;
for(unsigned jdx=0; jdx< remainder; jdx++)
{
auto length_char = stream[loc];
if (StringUtils::IsAlphabetical(length_char) || length_char == '@')
{
break;
}
else
{
loc++;
length += length_char;
}
}
unsigned length_amount = std::stoul(length);
auto buffer_index = ret.size() - offset_amount;
for(unsigned jdx=buffer_index;jdx<buffer_index+length_amount; jdx++)
{
ret += ret[jdx];
}
}
else
{
loc++;
ret += working_char;
}
}
return ret;
*/
return false;
}

View file

@ -0,0 +1,60 @@
#pragma once
#include "AbstractEncoder.h"
#include "HuffmanEncoder.h"
#include "CircleBuffer.h"
#include <string>
#include <vector>
#include <memory>
#include <tuple>
class PrefixCodeGenerator;
class Lz77Encoder : public AbstractEncoder
{
public:
using Hit = std::tuple<unsigned, unsigned, unsigned char>;
Lz77Encoder(BitStream* inputStream, BitStream* outputStream);
bool encode() override;
bool decode() override;
const std::vector<Hit>& getHitBuffer() const;
void setSearchBufferSize(unsigned size);
void setLookAheadBufferSize(unsigned size);
void setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator);
bool hitBufferFull() const;
private:
bool lookAheadSourceEmpty() const;
unsigned char getSearchBufferItem(unsigned index) const;
unsigned lookAheadForMatchingChars(unsigned searchIndex);
void lookForMatches(unsigned char searchChar, unsigned& hitLength, unsigned& hitOffset);
void populateLookaheadBuffer(unsigned size, bool firstPass = false);
void populateSearchBuffer(const Hit& hit);
unsigned mMaxHitBufferSize{32000};
std::vector<Hit> mHitBuffer;
unsigned mSearchBufferSize{32000};
CircleBuffer<unsigned char> mSearchBuffer;
unsigned mLookAheadBufferSize{256};
int mMaxLookAheadBufferIndex{0};
unsigned mMinLengthMatchSize{1};
CircleBuffer<unsigned char> mLookaheadBuffer;
std::unique_ptr<PrefixCodeGenerator> mCodeGenerator;
};

View file

@ -0,0 +1,54 @@
#include "RunLengthEncoder.h"
std::vector<RunLengthEncoder::Hit> RunLengthEncoder::encode(const std::vector<unsigned char>& input)
{
std::vector<RunLengthEncoder::Hit> ret;
if (input.empty())
{
return ret;
}
char working_char{0};
unsigned count = 1;
for(unsigned idx=0; idx<input.size(); idx++)
{
auto c = input[idx];
if (idx == 0)
{
working_char = c;
continue;
}
if (c == working_char)
{
count++;
}
else
{
ret.push_back({working_char, count});
working_char = c;
count = 1;
}
}
ret.push_back({working_char, count});
return ret;
}
std::vector<unsigned char> RunLengthEncoder::decode(const std::vector<RunLengthEncoder::Hit>& input)
{
std::vector<unsigned char> ret;
if (input.empty())
{
return ret;
}
for (const auto& hit : input)
{
for(unsigned idx=0; idx< hit.second; idx++)
{
ret.push_back(hit.first);
}
}
return ret;
}

View file

@ -0,0 +1,15 @@
#pragma once
#include <vector>
class RunLengthEncoder
{
public:
using Hit = std::pair<unsigned char, unsigned>;
std::vector<Hit> encode(const std::vector<unsigned char>& input);
std::vector<unsigned char> decode(const std::vector<Hit>& input);
private:
};

View file

@ -0,0 +1,6 @@
#pragma once
class StreamCompressor
{
};

View file

@ -0,0 +1,170 @@
#include "ZlibEncoder.h"
#include "ByteUtils.h"
#include "DeflateEncoder.h"
#include "FileLogger.h"
#include "BitStream.h"
#include "Adler32Checksum.h"
#include <math.h>
#include <iostream>
#include <sstream>
ZlibEncoder::ZlibEncoder(BitStream* inputStream, BitStream* outputStream)
: AbstractEncoder(inputStream, outputStream)
{
mChecksumCalculator = std::make_unique<Adler32Checksum>();
}
ZlibEncoder::~ZlibEncoder()
{
}
void ZlibEncoder::setWindowSize(unsigned size)
{
mWindowSize = size;
}
std::string ZlibEncoder::toString(CompressionLevel level) const
{
switch(level)
{
case CompressionLevel::FASTEST:
return "FASTEST";
case CompressionLevel::FAST:
return "FAST";
case CompressionLevel::DEFAULT:
return "DEFAULT";
case CompressionLevel::MAX_COMPRESSION:
return "MAX_COMPRESSION";
default:
return "UNKNOWN";
}
}
std::string ZlibEncoder::toString(CompressionMethod method) const
{
return method == CompressionMethod::DEFLATE ? "DEFLATE" : "UNKNOWN";
}
void ZlibEncoder::parseCompressionMethod(unsigned char method)
{
//std::cout << "Got compression input " << static_cast<int>(method) << std::endl;
mCompressionMethod = static_cast<CompressionMethod>(ByteUtils::getLowerNBits(method, 4));
auto compression_info = ByteUtils::getHigherNBits(method, 4);
if (mCompressionMethod == CompressionMethod::DEFLATE)
{
mWindowSize = static_cast<unsigned>(pow(2, compression_info + 8));
}
}
void ZlibEncoder::parseExtraFlags(unsigned char extraFlags, unsigned char compression_byte)
{
//std::cout << "Got flags " << static_cast<int>(extraFlags) << std::endl;
auto mod = ((static_cast<unsigned>(compression_byte) << 8) | extraFlags) % 31;
if (mod != 0)
{
//std::cout << "Invalid header. Mod is " << mod << std::endl;
}
mFlagCheck = ByteUtils::getLowerNBits(extraFlags, 5);
mUseDictionary = bool(ByteUtils::getBitN(extraFlags, 5));
mFlagLevel = static_cast<CompressionLevel>(ByteUtils::getHigherNBits(extraFlags, 2));
}
std::string ZlibEncoder::getData() const
{
std::stringstream sstream;
sstream << "ZlibEncoder data \n";
sstream << "Compression method: " << toString(mCompressionMethod) << '\n';
sstream << "Window size: " << mWindowSize << '\n';
sstream << "Flag check: " << static_cast<int>(mFlagCheck) << '\n';
sstream << "Use dictionary: " << mUseDictionary << '\n';
sstream << "Flag level: " << toString(mFlagLevel) << '\n';
return sstream.str();
}
bool ZlibEncoder::encode()
{
DeflateEncoder* deflate_encoder{nullptr};
if (!mWorkingEncoder)
{
if (mCompressionMethod == CompressionMethod::DEFLATE)
{
auto uq_deflate_encoder = std::make_unique<DeflateEncoder>(mInputStream, mOutputStream);
deflate_encoder = uq_deflate_encoder.get();
mWorkingEncoder = std::move(uq_deflate_encoder);
mWorkingEncoder->addChecksumCalculator(mChecksumCalculator.get());
}
else
{
MLOG_ERROR("Zib requested decoder not recognized: " << static_cast<int>(mCompressionMethod) << " aborting encode");
return false;
}
}
deflate_encoder->setCompressionMethod(mDeflateCompressionMethod);
auto compression_info = static_cast<unsigned char>(log2(mWindowSize) - 8);
const unsigned char compression_byte = (compression_info << 4) | static_cast<unsigned char>(mCompressionMethod);
//std::cout << "ZlibEncoder Writing compression byte " << static_cast<int>(compression_byte) << " with info " << static_cast<int>(compression_info) << std::endl;
mOutputStream->writeByte(compression_byte);
unsigned char flag_byte{0};
flag_byte |= (static_cast<unsigned char>(mUseDictionary) << 5);
flag_byte |= (static_cast<unsigned char>(mFlagLevel) << 6);
const auto mod = (unsigned(compression_byte)*256 + flag_byte) % 31;
flag_byte += (31 - mod);
//std::cout << "ZlibEncoder Writing Flag byte " << static_cast<int>(flag_byte) << std::endl;
mOutputStream->writeByte(flag_byte);
if(!mWorkingEncoder->encode())
{
MLOG_ERROR("Sub-Encoder failed - aborting zlib encode");
//return false;
}
const auto checksum = mChecksumCalculator->getChecksum();
//std::cout << "ZlibEncoder Writing Adler32 Checksum " << checksum << std::endl;
mOutputStream->write(checksum);
return true;
}
bool ZlibEncoder::decode()
{
auto compression_byte = *mInputStream->readNextByte();
parseCompressionMethod(compression_byte);
parseExtraFlags(*mInputStream->readNextByte(), compression_byte);
if (!mWorkingEncoder)
{
if (mCompressionMethod == CompressionMethod::DEFLATE)
{
mWorkingEncoder = std::make_unique<DeflateEncoder>(mInputStream, mOutputStream);
}
else
{
MLOG_ERROR("Zib requested decoder not recognized: " << static_cast<int>(mCompressionMethod) << " aborting decode");
return false;
}
}
auto valid = mWorkingEncoder->decode();
unsigned char byte0 = *mInputStream->readNextByte();
unsigned char byte1 = *mInputStream->readNextByte();
unsigned char byte2 = *mInputStream->readNextByte();
unsigned char byte3 = *mInputStream->readNextByte();
uint32_t adler32 = (byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3;
//std::cout << "Got adler 32 checksum " << adler32 << std::endl;
return valid;
}

View file

@ -0,0 +1,59 @@
#pragma once
#include "AbstractEncoder.h"
#include "DeflateElements.h"
#include <memory>
#include <vector>
class AbstractChecksumCalculator;
class ZlibEncoder : public AbstractEncoder
{
public:
enum class CompressionMethod : unsigned char
{
DEFLATE = 8,
};
enum class CompressionLevel : unsigned char
{
FASTEST,
FAST,
DEFAULT,
MAX_COMPRESSION
};
ZlibEncoder(BitStream* inputStream, BitStream* outputStream);
~ZlibEncoder();
void setWindowSize(unsigned size);
void setDeflateCompressionMethod(Deflate::CompressionMethod method)
{
mDeflateCompressionMethod = method;
}
bool encode() override;
bool decode() override;
std::string getData() const;
std::string toString(CompressionLevel level) const;
std::string toString(CompressionMethod method) const;
private:
void parseCompressionMethod(unsigned char method);
void parseExtraFlags(unsigned char extraFlags, unsigned char compression_byte);
CompressionMethod mCompressionMethod{CompressionMethod::DEFLATE};
Deflate::CompressionMethod mDeflateCompressionMethod{Deflate::CompressionMethod::NONE};
unsigned mWindowSize{32768}; // Window size, n in 2^(n+8) bytes
unsigned char mFlagCheck{0};
bool mUseDictionary{false};
CompressionLevel mFlagLevel{CompressionLevel::DEFAULT};
std::unique_ptr<AbstractChecksumCalculator> mChecksumCalculator;
std::unique_ptr<AbstractEncoder> mWorkingEncoder;
};

View file

@ -0,0 +1,157 @@
#include "DeflateBlock.h"
#include "ByteUtils.h"
#include "AbstractChecksumCalculator.h"
#include <algorithm>
#include <iostream>
#include <sstream>
DeflateBlock::DeflateBlock(BitStream* inputStream, BitStream* outputStream)
: mInputStream(inputStream),
mOutputStream(outputStream)
{
}
std::string DeflateBlock::getMetaData() const
{
std::stringstream sstr;
sstr << "DeflateBlock Metadata \n";
sstr << "Final block: " << mInFinalBlock << '\n';
sstr << "Compression method: " << Deflate::toString(mCompressionMethod) << '\n';
sstr << "Uncompressed block length: " << mUncompressedBlockLength << '\n';
return sstr.str();
}
void DeflateBlock::setIsFinalBlock(bool isFinal)
{
mInFinalBlock = isFinal;
}
bool DeflateBlock::isFinalBlock() const
{
return mInFinalBlock;
}
bool DeflateBlock::read()
{
auto working_byte = *mInputStream->readNextByte();
//std::cout << mInputStream->logNextNBytes(60);
//std::cout << "DeflateBlock::read location " << mInputStream->logLocation();
unsigned char final_block{0};
mInputStream->readNextNBits(1, final_block);
mInFinalBlock = bool(final_block);
unsigned char compression_type{0};
mInputStream->readNextNBits(2, compression_type);
mCompressionMethod = static_cast<Deflate::CompressionMethod>(compression_type);
if (mCompressionMethod == Deflate::CompressionMethod::NONE)
{
return readUncompressedStream();
}
else if(mCompressionMethod == Deflate::CompressionMethod::FIXED_HUFFMAN)
{
return readFixedHuffmanStream();
}
else if(mCompressionMethod == Deflate::CompressionMethod::DYNAMIC_HUFFMAN)
{
return readDynamicHuffmanStream();
}
return false;
}
bool DeflateBlock::readUncompressedStream()
{
auto byte0 = *mInputStream->readNextByte();
auto byte1 = *mInputStream->readNextByte();
mUncompressedBlockLength = (byte0 << 8) | byte1;
std::cout << "Check block 0: " << ByteUtils::toString(byte0) << std::endl;
std::cout << "Check block 1: " << ByteUtils::toString(byte1) << std::endl;
auto byte2 = *mInputStream->readNextByte();
auto byte3 = *mInputStream->readNextByte();
uint16_t len_check = (byte2 << 8) | byte3;
//std::cout << "Check block 2: " << ByteUtils::toString(byte2) << std::endl;
//std::cout << "Check block 3: " << ByteUtils::toString(byte3) << std::endl;
//if (!(byte0 ==(~byte2) && byte1 ==(~byte3)))
//{
//std::cout << "Uncompressed block length check failed - aborting." << std::endl;
//return false;
//}
//else
//{
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
{
mOutputStream->writeByte(*mInputStream->readNextByte());
}
//}
return true;
}
bool DeflateBlock::readFixedHuffmanStream()
{
//std::cout << "Reading fixed huffman stream" << std::endl;
mHuffmanStream = std::make_unique<HuffmanStream>(mInputStream, mOutputStream);
mHuffmanStream->generateFixedCodeMapping();
return mHuffmanStream->decode();
}
bool DeflateBlock::readDynamicHuffmanStream()
{
mHuffmanStream = std::make_unique<HuffmanStream>(mInputStream, mOutputStream);
return mHuffmanStream->decode();
}
void DeflateBlock::write(uint16_t datalength)
{
mUncompressedBlockLength = datalength;
unsigned char working_block{0};
working_block |= static_cast<unsigned char>(mInFinalBlock);
working_block |= (static_cast<unsigned char>(mCompressionMethod) << 1);
if (mCompressionMethod == Deflate::CompressionMethod::NONE)
{
writeUncompressedStream(working_block, datalength);
}
else if (mCompressionMethod == Deflate::CompressionMethod::FIXED_HUFFMAN)
{
mOutputStream->writeNBits(working_block, 3);
while(auto byte = mInputStream->readNextByte())
{
mOutputStream->writeByte(*byte);
}
if (const auto& remaining_bits = mInputStream->getRemainingBits(); remaining_bits.second > 0)
{
mOutputStream->writeNBits(remaining_bits.first, remaining_bits.second);
}
}
}
void DeflateBlock::writeUncompressedStream(unsigned char working_byte, uint16_t datalength)
{
//std::cout << "Writing compression block header " << ByteUtils::toString(working_byte) << std::endl;
mOutputStream->writeByte(working_byte);
//std::cout << "Writing data length " << mUncompressedBlockLength << " " << ByteUtils::toString(mUncompressedBlockLength) << std::endl;
mOutputStream->writeWord(datalength);
//std::cout << "Writing iverse data length " << ~mUncompressedBlockLength << " " << ByteUtils::toString(~mUncompressedBlockLength) << std::endl;
mOutputStream->writeWord(static_cast<uint16_t>(~mUncompressedBlockLength));
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
{
auto byte = *mInputStream->readNextByte();
//std::cout << "Writing next byte " << static_cast<int>(byte) << std::endl;
mOutputStream->writeByte(byte);
}
}

View file

@ -0,0 +1,46 @@
#pragma once
#include "DeflateElements.h"
#include "HuffmanStream.h"
#include "BitStream.h"
#include <memory>
class AbstractChecksumCalculator;
class DeflateBlock
{
public:
DeflateBlock(BitStream* inputStream, BitStream* outputStream);
std::string getMetaData() const;
bool isFinalBlock() const;
bool read();
void setIsFinalBlock(bool isFinal);
void setCompressionMethod(Deflate::CompressionMethod method)
{
mCompressionMethod = method;
}
void write(uint16_t datalength);
private:
bool readUncompressedStream();
bool readFixedHuffmanStream();
bool readDynamicHuffmanStream();
void writeUncompressedStream(unsigned char working_byte, uint16_t datalength);
BitStream* mInputStream;
BitStream* mOutputStream;
std::unique_ptr<HuffmanStream> mHuffmanStream;
uint16_t mUncompressedBlockLength{0};
bool mInFinalBlock{false};
Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::NONE};
};

View file

@ -0,0 +1,33 @@
#pragma once
#include <string>
namespace Deflate
{
enum class CompressionMethod
{
NONE,
FIXED_HUFFMAN,
DYNAMIC_HUFFMAN,
ERROR
};
inline std::string toString(CompressionMethod method)
{
switch (method)
{
case CompressionMethod::NONE:
return "NONE";
case CompressionMethod::FIXED_HUFFMAN:
return "FIXED_HUFFMAN";
case CompressionMethod::DYNAMIC_HUFFMAN:
return "DYNAMIC_HUFFMAN";
case CompressionMethod::ERROR:
return "ERROR";
default:
return "UNKNOWN";
}
}
}

View file

@ -0,0 +1,88 @@
#include "DeflateEncoder.h"
#include "BitStream.h"
#include "ByteUtils.h"
#include "DeflateBlock.h"
#include "BufferBitStream.h"
#include <iostream>
DeflateEncoder::DeflateEncoder(BitStream* inputStream, BitStream* outputStream)
: AbstractEncoder(inputStream, outputStream)
{
}
DeflateEncoder::~DeflateEncoder()
{
}
bool DeflateEncoder::encode()
{
uint16_t count = 0;
BufferBitStream stream;
std::unique_ptr<DeflateBlock> working_block = std::make_unique<DeflateBlock>(&stream, mOutputStream);
working_block->setCompressionMethod(mCompressionMethod);
if (mChecksumCalculators.size() > 0)
{
//std::cout << "Setting checksum calculator " << std::endl;
mOutputStream->setChecksumCalculator(mChecksumCalculators[0]);
}
while(true)
{
if (count == mMaxBlockSize)
{
//std::cout << working_block->getMetaData();
working_block->write(count);
working_block = std::make_unique<DeflateBlock>(&stream, mOutputStream);
working_block->setCompressionMethod(mCompressionMethod);
stream.reset();
}
if (auto byte = mInputStream->readNextByte())
{
//std::cout << "Adding byte " << ByteUtils::toString(*byte) << " to deflate block input" << std::endl;
stream.writeByte(*byte);
}
else
{
if (const auto& remaining_bits = mInputStream->getRemainingBits(); remaining_bits.second > 0)
{
stream.writeNBits(remaining_bits.first, remaining_bits.second);
}
stream.resetOffsets();
working_block->setIsFinalBlock(true);
//std::cout << working_block->getMetaData();
working_block->write(count);
break;
}
count++;
}
mOutputStream->flushRemainingBits();
mOutputStream->clearChecksumCalculator();
return true;
}
bool DeflateEncoder::decode()
{
auto working_block = std::make_unique<DeflateBlock>(mInputStream, mOutputStream);
working_block->read();
//std::cout << working_block->getMetaData() << std::endl;
DeflateBlock* raw_block = working_block.get();
while(!raw_block->isFinalBlock())
{
break;
}
return true;
}

View file

@ -0,0 +1,33 @@
#pragma once
#include "AbstractEncoder.h"
#include "DeflateElements.h"
#include <vector>
#include <memory>
class DeflateBlock;
class DeflateEncoder : public AbstractEncoder
{
public:
DeflateEncoder(BitStream* inputStream, BitStream* outputStream);
~DeflateEncoder();
bool encode() override;
bool decode() override;
void setCompressionMethod(Deflate::CompressionMethod method)
{
mCompressionMethod = method;
}
private:
uint16_t mMaxBlockSize{65535};
Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::NONE};
std::unique_ptr<DeflateBlock > mLastBlock;
};

View file

@ -0,0 +1,253 @@
#include "HuffmanCodeLengthTable.h"
#include "ByteUtils.h"
#include "RunLengthEncoder.h"
#include "BitStream.h"
#include <algorithm>
#include <sstream>
#include <iostream>
void HuffmanCodeLengthTable::buildCompressedLengthSequence()
{
RunLengthEncoder rl_encoder;
auto rle_encoded = rl_encoder.encode(mInputLengthSequence);
for (const auto& entry : rle_encoded)
{
//std::cout << "Got rle " << static_cast<int>(entry.first) << " | " << entry.second << std::endl;
}
mCompressedLengthSequence.clear();
for (const auto& entry : rle_encoded)
{
const auto length = entry.first;
const auto count = entry.second;
if (count < 3)
{
for(std::size_t idx=0; idx<count; idx++)
{
mCompressedLengthSequence.push_back({length, 0});
}
}
else if (length == 0)
{
std::size_t num_big = count / 138;
for(std::size_t idx=0; idx<num_big; idx++)
{
mCompressedLengthSequence.push_back({18, 127});
}
auto remainder_big = count % 138;
if (remainder_big > 10)
{
mCompressedLengthSequence.push_back({18, remainder_big-11});
}
else if(remainder_big > 2)
{
mCompressedLengthSequence.push_back({17, remainder_big-3});
}
else
{
for(std::size_t idx=0; idx<remainder_big; idx++)
{
mCompressedLengthSequence.push_back({0, 0});
}
}
}
else
{
mCompressedLengthSequence.push_back({length, 0});
auto num_blocks_of_six = (count-1)/6;
for(std::size_t idx=0; idx<num_blocks_of_six; idx++)
{
mCompressedLengthSequence.push_back({16, 3});
}
auto remaining_counts = (count-1) % 6;
if (remaining_counts >= 3)
{
mCompressedLengthSequence.push_back({16, remaining_counts - 3});
}
else
{
for(std::size_t idx=0; idx<remaining_counts; idx++)
{
mCompressedLengthSequence.push_back({length, 0});
}
}
}
}
mCompressedLengthCounts = std::vector<std::size_t>(19, 0);
for (const auto& entry : mCompressedLengthSequence)
{
mCompressedLengthCounts[entry.first]++;
}
}
const std::vector<HuffmanCodeLengthTable::CompressedSequenceEntry>& HuffmanCodeLengthTable::getCompressedLengthSequence() const
{
return mCompressedLengthSequence;
}
const std::vector<std::size_t> HuffmanCodeLengthTable::getCompressedLengthCounts() const
{
return mCompressedLengthCounts;
}
std::optional<PrefixCode> HuffmanCodeLengthTable::getCodeForSymbol(unsigned symbol) const
{
return mTree.getCode(symbol);
}
bool HuffmanCodeLengthTable::readNextSymbol(unsigned& result, BitStream* stream)
{
if (getNumCodeLengths() == 0)
{
return false;
}
std::size_t working_index{0};
auto length = getCodeLength(working_index);
auto delta = length;
bool found{false};
unsigned char buffer{0};
uint32_t working_bits{0};
unsigned working_symbol{0};
while(!found)
{
auto valid = stream->readNextNBits(delta, buffer);
//std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;;
unsigned hold = buffer;
working_bits = working_bits | (hold << (length - delta));
//std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl;
if (const auto symbol = findMatch(working_index, working_bits))
{
found = true;
working_symbol = *symbol;
}
else
{
working_index++;
if (working_index >= getNumCodeLengths())
{
break;
}
auto new_length = getCodeLength(working_index);
delta = new_length - length;
length = new_length;
}
}
if (found)
{
result = working_symbol;
// std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl;
// std::cout << "At Byte offset " << stream->getCurrentByteOffset() << " and bit offset " << stream->getCurrentBitOffset() << std::endl;
return true;
}
else
{
//std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << " and index " << working_index << std::endl;
return false;
}
}
void HuffmanCodeLengthTable::buildPrefixCodes()
{
if(mInputLengthSequence.empty())
{
return;
}
unsigned char max_length = *std::max_element(mInputLengthSequence.begin(), mInputLengthSequence.end());
std::vector<unsigned> counts(max_length+1, 0);
for (const auto length : mInputLengthSequence)
{
counts[length]++;
}
counts[0] = 0;
uint32_t code{0};
std::vector<uint32_t> next_code(max_length + 1, 0);
for (unsigned bits = 1; bits <= max_length; bits++)
{
code = (code + counts[bits-1]) << 1;
//std::cout << "Start code for bit " << bits << " is " << ByteUtils::toString(code) << " | dec " << code << " count " << counts[bits-1] << std::endl;
next_code[bits] = code;
}
for(std::size_t idx=0; idx<mInputLengthSequence.size(); idx++)
{
if (const auto length = mInputLengthSequence[idx]; length != 0)
{
const auto code = next_code[length];
next_code[length]++;
auto prefix_code = PrefixCode(code, length);
mTree.addCodeLengthEntry(length, {PrefixCode(code, length), static_cast<unsigned>(idx)});
mCodes.push_back(prefix_code);
}
}
mTree.sortTable();
//std::cout << dumpPrefixCodes();
}
const PrefixCode& HuffmanCodeLengthTable::getCode(std::size_t index) const
{
return mCodes[index];
}
std::string HuffmanCodeLengthTable::dumpPrefixCodes() const
{
return mTree.dump();
}
std::size_t HuffmanCodeLengthTable::mapToDeflateIndex(std::size_t index) const
{
if (index>= DEFLATE_PERMUTATION_SIZE)
{
return 0;
}
else
{
return DEFLATE_PERMUTATION[index];
}
}
std::size_t HuffmanCodeLengthTable::getNumCodeLengths() const
{
return mTree.getNumCodeLengths();
}
std::optional<HuffmanTree::Symbol> HuffmanCodeLengthTable::findMatch(std::size_t treeIndex, uint32_t code) const
{
return mTree.findMatch(treeIndex, code);
}
unsigned HuffmanCodeLengthTable::getCodeLength(std::size_t index) const
{
return mTree.getCodeLength(index);
}
void HuffmanCodeLengthTable::setInputLengthSequence(const std::vector<unsigned char>& sequence, bool targetDeflate)
{
mTargetDeflate = targetDeflate;
if (targetDeflate)
{
mInputLengthSequence = std::vector<unsigned char>(DEFLATE_PERMUTATION_SIZE, 0);
for(std::size_t idx=0; idx<sequence.size(); idx++)
{
mInputLengthSequence[mapToDeflateIndex(idx)] = sequence[idx];
//std::cout << "Got code length for " << mapToDeflateIndex(idx) << " of " << static_cast<unsigned>(sequence[idx]) << std::endl;
}
}
else
{
mInputLengthSequence = sequence;
}
}

View file

@ -0,0 +1,56 @@
#pragma once
#include "HuffmanTree.h"
#include <vector>
#include <string>
#include <optional>
class BitStream;
class HuffmanCodeLengthTable
{
public:
void buildPrefixCodes();
void buildCompressedLengthSequence();
std::string dumpPrefixCodes() const;
std::optional<HuffmanTree::Symbol> findMatch(std::size_t treeIndex, uint32_t code) const;
const HuffmanTree& getTree() const;
const PrefixCode& getCode(std::size_t index) const;
std::optional<PrefixCode> getCodeForSymbol(unsigned symbol) const;
using CompressedSequenceEntry = std::pair<unsigned, unsigned>;
const std::vector<CompressedSequenceEntry>& getCompressedLengthSequence() const;
const std::vector<std::size_t> getCompressedLengthCounts() const;
std::size_t getNumCodeLengths() const;
unsigned getCodeLength(std::size_t treeIndex) const;
std::size_t mapToDeflateIndex(std::size_t index) const;
void setInputLengthSequence(const std::vector<unsigned char>& sequence, bool targetDeflate = true);
bool readNextSymbol(unsigned& buffer, BitStream* stream);
private:
HuffmanTree mTree;
bool mTargetDeflate{true};
std::vector<unsigned char> mInputLengthSequence;
std::vector<PrefixCode> mCodes;
std::vector<CompressedSequenceEntry> mCompressedLengthSequence;
std::vector<std::size_t> mCompressedLengthCounts;
static constexpr unsigned DEFLATE_PERMUTATION_SIZE{19};
static constexpr unsigned DEFLATE_PERMUTATION[DEFLATE_PERMUTATION_SIZE]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
};

View file

@ -0,0 +1,173 @@
#include "HuffmanEncoder.h"
#include "RawTree.h"
#include "HuffmanFixedCodes.h"
#include <unordered_map>
#include <queue>
#include <tuple>
#include <iostream>
void HuffmanEncoder::dumpNode(RawNode<CountPair>* node, unsigned depth) const
{
if (!node)
{
return;
}
auto data = node->getData();
std::string prefix(depth, '_');
if (node->isLeaf())
{
//std::cout << prefix << "Leaf with value: " << data.first << " and sum " << data.second << std::endl;
}
else
{
//std::cout << prefix << "Intermediate with sum " << data.second << std::endl;
//std::cout << prefix << "Doing Left.." << std::endl;
dumpNode(node->getLeftChild(), depth+1);
//std::cout << prefix << "Doing Right.." << std::endl;
dumpNode(node->getRightChild(), depth+1);
//std::cout << prefix << "*****" << std::endl;
}
}
void HuffmanEncoder::dumpTree(const RawTree<CountPair>& tree) const
{
dumpNode(tree.getRootNode(), 0);
}
void HuffmanEncoder::encode(const std::vector<unsigned>& counts)
{
auto cmp = [](RawNode<CountPair>* left, RawNode<CountPair>* right)
{
return left->getData().second > right->getData().second;
};
std::priority_queue<RawNode<CountPair>*, std::vector<RawNode<CountPair>* >, decltype(cmp)> q(cmp);
unsigned offset{0};
for (auto count : counts)
{
if (count > 0)
{
q.push(new RawNode<CountPair>({offset, count}));
}
offset++;
}
while(q.size() > 1)
{
auto node0 = q.top();
q.pop();
auto node1 = q.top();
q.pop();
const auto sum = node0->getData().second + node1->getData().second;
auto new_node = new RawNode<CountPair>(CountPair{0, sum});
new_node->addChild(node0);
new_node->addChild(node1);
q.push(new_node);
}
auto root = q.top();
q.pop();
RawTree<CountPair> tree;
tree.addRootNode(root);
//using TableEntry = std::tuple<>
//dumpTree(tree);
//std::cout << "********" << std::endl;
}
void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& counts)
{
std::vector<unsigned> just_counts;
for (const auto& data: counts)
{
mSymbolMapping.push_back(data.first);
just_counts.push_back(data.second);
}
encode(just_counts);
}
void HuffmanEncoder::setUseFixedCode(bool useFixed)
{
mUseFixedCode = useFixed;
}
uint32_t HuffmanEncoder::getLengthValue(unsigned length)
{
return 0;
}
std::optional<PrefixCode> HuffmanEncoder::getLiteralValue(unsigned char value) const
{
return mLiteralLengthTable.getCodeForSymbol(value);
}
std::optional<PrefixCode> HuffmanEncoder::getLengthValue(unsigned length) const
{
return mLiteralLengthTable.getCodeForSymbol(length);
}
std::optional<PrefixCode> HuffmanEncoder::getDistanceValue(unsigned distance) const
{
return mDistanceTable.getCodeForSymbol(distance);
}
std::optional<PrefixCode> HuffmanEncoder::getEndOfStreamValue() const
{
return mLiteralLengthTable.getCodeForSymbol(256);
}
void HuffmanEncoder::initializeTrees(const std::vector<Hit>& hits)
{
initializeLiteralLengthTable(hits);
}
void HuffmanEncoder::initializeLiteralLengthTable(const std::vector<Hit>& hits)
{
if(mUseFixedCode)
{
mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
mLiteralLengthTable.buildPrefixCodes();
return;
}
std::vector<unsigned> counts(285, 0);
counts[256] = 1;
for (const auto& hit : hits)
{
const auto& [length, distance, next_char] = hit;
if (length > 0 )
{
const auto& [code, extra_bits, num_extra_bits] = HuffmanFixedCodes::getCodeForLength(length);
counts[code]++;
}
else
{
counts[next_char]++;
}
}
for(unsigned idx=0; idx<counts.size(); idx++)
{
if (counts[idx]>0)
{
//std::cout << "Count for " << idx << " is " << counts[idx] << std::endl;
}
}
encode(counts);
}

View file

@ -0,0 +1,57 @@
#pragma once
#include "RawTree.h"
#include "HuffmanCodeLengthTable.h"
#include "HuffmanFixedCodes.h"
#include <vector>
#include <tuple>
#include <unordered_map>
class PrefixCodeGenerator
{
public:
virtual ~PrefixCodeGenerator() = default;
virtual std::optional<PrefixCode> getLiteralValue(unsigned char symbol) const = 0;
virtual std::optional<PrefixCode> getLengthValue(unsigned length) const = 0;
virtual std::optional<PrefixCode> getDistanceValue(unsigned distance) const = 0;
virtual std::optional<PrefixCode> getEndOfStreamValue() const = 0;
};
class HuffmanEncoder : public PrefixCodeGenerator
{
using CountPair = std::pair<unsigned, unsigned>;
using Hit = std::tuple<unsigned, unsigned, unsigned char>;
public:
void encode(const std::vector<unsigned>& counts);
void encode(const std::unordered_map<unsigned char, unsigned>& counts);
uint32_t getLengthValue(unsigned length);
std::optional<PrefixCode> getLiteralValue(unsigned char symbol) const override;
std::optional<PrefixCode> getLengthValue(unsigned length) const override;
std::optional<PrefixCode> getDistanceValue(unsigned distance) const override;
std::optional<PrefixCode> getEndOfStreamValue() const override;
void initializeTrees(const std::vector<Hit>& hits);
void setUseFixedCode(bool useFixed);
private:
void initializeLiteralLengthTable(const std::vector<Hit>& hits);
void dumpTree(const RawTree<CountPair>& tree) const;
void dumpNode(RawNode<CountPair>* node, unsigned depth) const;
bool mUseFixedCode{false};
bool mTableIsInitialized{false};
std::vector<unsigned char> mSymbolMapping;
HuffmanCodeLengthTable mLiteralLengthTable;
HuffmanCodeLengthTable mDistanceTable;
};

View file

@ -0,0 +1,145 @@
#pragma once
#include <vector>
#include <tuple>
namespace HuffmanFixedCodes
{
inline std::vector<unsigned char> getDeflateFixedHuffmanCodes()
{
std::vector<std::pair<unsigned, unsigned char> > mappings {{144, 8}, {112, 9}, {24, 7}, {8 ,8}};
std::vector<unsigned char> sequence;
for(const auto& entry : mappings)
{
for(unsigned idx=0;idx<entry.first;idx++)
{
sequence.push_back(entry.second);
}
}
return sequence;
}
inline std::tuple<unsigned, unsigned char, unsigned char> getCodeForLength(unsigned length)
{
if (length <= 10)
{
return {length - 3, 0, 0};
}
unsigned base = 2;
unsigned last_offset = 10;
for (unsigned n = 1; n < 5; n++)
{
const auto diff = length - last_offset;
if (diff <= 4*base)
{
auto extra = diff/base + diff % base;
return {last_offset + diff/base, extra, n};
}
last_offset += 4*n;
base = base*2;
}
return {258, 0, 0};
}
/*
inline std::pair<unsigned, unsigned char> getCodeForLength(unsigned length)
{
if (length <= 10)
{
return {257 + length - 3, 0};
}
else if(length <= 18)
{
auto offset = length - 10;
auto extra = offset/2 + offset % 2;
return {265 + offset/2, extra};
}
else if(length <= 34)
{
auto offset = length - 19;
auto extra = offset/4 + offset % 4;
return {269 + offset/4, extra};
}
else if(length <= 66)
{
auto offset = length - 35;
auto extra = offset/8 + offset % 8;
return {273 + offset/8, extra};
}
else if(length <= 114)
{
auto offset = length - 67;
auto extra = offset/16 + offset % 16;
return {277 + offset/16, extra};
}
else if(length <= 257)
{
auto offset = length - 115;
auto extra = offset/32 + offset % 32;
return {281 + offset/32, extra};
}
else
{
return {258, 0};
}
}
inline unsigned getLengthForCode(unsigned symbol, unsigned extra)
{
if (symbol <= 264)
{
return 3 + symbol - 257;
}
else if (symbol <= 268)
{
return 11 + 2*(symbol - 265) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 272)
{
unsigned char extra{0};
mInputStream->readNextNBits(2, extra);
auto length = 19 + 4*(symbol - 269) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 276)
{
unsigned char extra{0};
mInputStream->readNextNBits(3, extra);
auto length = 35 + 8*(symbol - 273) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 280)
{
unsigned char extra{0};
mInputStream->readNextNBits(4, extra);
auto length = 67 + 16*(symbol - 277) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 284)
{
unsigned char extra{0};
mInputStream->readNextNBits(5, extra);
auto length = 131 + 32*(symbol - 281) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol == 285)
{
auto length = 258;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
}
*/
}

View file

@ -0,0 +1,346 @@
#include "HuffmanStream.h"
#include "ByteUtils.h"
#include "HuffmanFixedCodes.h"
#include <iostream>
#include <algorithm>
#include <unordered_map>
#include <sstream>
std::vector<unsigned> DISTANCE_OFFSETS
{
5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
258, 385, 513, 769, 1025, 1537, 2049, 3073, 4097,
6145, 8193, 12289, 16385, 24577
};
HuffmanStream::HuffmanStream(BitStream* inputStream, BitStream* outputStream)
: mInputStream(inputStream),
mOutputStream(outputStream)
{
}
void HuffmanStream::generateFixedCodeMapping()
{
mUsingFixedCodes = true;
mCodeLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
mCodeLengthTable.buildPrefixCodes();
}
bool HuffmanStream::readNextCodeLengthSymbol(unsigned& buffer)
{
return mCodeLengthTable.readNextSymbol(buffer, mInputStream);
}
bool HuffmanStream::readNextLiteralSymbol(unsigned& buffer)
{
return mLiteralTable.readNextSymbol(buffer, mInputStream);
}
bool HuffmanStream::readNextDistanceSymbol(unsigned& buffer)
{
unsigned base_symbol{0};
unsigned char extra_bits{0};
const auto valid = mDistanceTable.readNextSymbol(base_symbol, mInputStream);
if (!valid)
{
return false;
}
//std::cout << "Got distance base symbol " << base_symbol << std::endl;
if (base_symbol <= 3)
{
buffer = 1 + base_symbol;
}
else
{
const auto num_extra_bits = (base_symbol - 3 - 1)/2 + 1;
unsigned extra_sum{0};
if (num_extra_bits > 8)
{
auto byte_val = *mInputStream->readNextByte();
mInputStream->readNextNBits(num_extra_bits-8, extra_bits);
extra_sum = extra_bits;
extra_sum = extra_sum << (num_extra_bits - 8);
extra_sum |= byte_val;
}
else
{
mInputStream->readNextNBits(num_extra_bits, extra_bits);
extra_sum = extra_bits;
}
buffer = DISTANCE_OFFSETS[base_symbol - 4] + extra_sum;
}
return true;
}
void HuffmanStream::addValue(unsigned value, unsigned& count, unsigned& lastValue, std::vector<unsigned char>& literals, unsigned numLiterals, std::vector<unsigned char>& distances)
{
if (count < mNumLiterals)
{
literals[count] = value;
}
else
{
distances[count - mNumLiterals] = value;
}
lastValue = value;
count++;
}
void HuffmanStream::readCodeLengths()
{
std::vector<unsigned char> literal_lengths(288, 0);
std::vector<unsigned char> distance_lengths(32, 0);
unsigned symbol{0};
unsigned count{0};
unsigned last_value{0};
while(count < mNumLiterals + mNumDistances)
{
bool valid = readNextCodeLengthSymbol(symbol);
if (!valid)
{
//std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
if (symbol < 16)
{
addValue(symbol, count, last_value, literal_lengths, mNumLiterals, distance_lengths);
}
else if(symbol == 16)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(2, num_reps);
//std::cout << "Got val 16 doing " << 3 + num_reps << std::endl;
for(unsigned char idx=0; idx< 3 + num_reps; idx++)
{
addValue(last_value, count, last_value, literal_lengths, mNumLiterals, distance_lengths);
}
}
else if(symbol == 17)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(3, num_reps);
//std::cout << "Got val 17 doing " << 3 + num_reps << std::endl;
for(unsigned char idx=0; idx< 3 + num_reps; idx++)
{
addValue(0, count, last_value, literal_lengths, mNumLiterals, distance_lengths);
}
}
else if(symbol == 18)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(7, num_reps);
//std::cout << "Got val 18 doing " << 11 + num_reps << std::endl;
for(unsigned idx=0; idx< 11 + unsigned(num_reps); idx++)
{
addValue(0, count, last_value, literal_lengths, mNumLiterals, distance_lengths);
}
}
}
//std::cout << "Got final literal length sequence " << std::endl;
for(unsigned idx=0; idx<literal_lengths.size(); idx++)
{
//std::cout << static_cast<int>(literal_lengths[idx]) << "," ;
}
//std::cout << std::endl;
//std::cout << "Got final distance length sequence " << std::endl;
for(unsigned idx=0; idx<distance_lengths.size(); idx++)
{
//std::cout << static_cast<int>(distance_lengths[idx]) << "," ;
}
//std::cout << std::endl;
mLiteralTable.setInputLengthSequence(literal_lengths, false);
mLiteralTable.buildPrefixCodes();
mDistanceTable.setInputLengthSequence(distance_lengths, false);
mDistanceTable.buildPrefixCodes();
}
void HuffmanStream::copyFromBuffer(unsigned length, unsigned distance)
{
std::size_t offset = mBuffer.size() - 1 - distance;
for(unsigned idx=0; idx<length; idx++)
{
auto symbol = mBuffer[offset + idx];
mOutputStream->writeByte(symbol);
mBuffer.push_back(symbol);
}
}
void HuffmanStream::readSymbols()
{
bool hit_end_stream{false};
unsigned symbol{0};
unsigned distance{0};
while(!hit_end_stream)
{
const auto valid = readNextLiteralSymbol(symbol);
if (!valid)
{
//std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
//std::cout << "Got symbol " << symbol << std::endl;
if(symbol <= 255)
{
mOutputStream->writeByte(symbol);
mBuffer.push_back(symbol);
}
else if(symbol == 256)
{
hit_end_stream = true;
break;
}
else if (symbol <= 264)
{
auto length = 3 + symbol - 257;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 268)
{
unsigned char extra{0};
mInputStream->readNextNBits(1, extra);
auto length = 11 + 2*(symbol - 265) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 272)
{
unsigned char extra{0};
mInputStream->readNextNBits(2, extra);
auto length = 19 + 4*(symbol - 269) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 276)
{
unsigned char extra{0};
mInputStream->readNextNBits(3, extra);
auto length = 35 + 8*(symbol - 273) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 280)
{
unsigned char extra{0};
mInputStream->readNextNBits(4, extra);
auto length = 67 + 16*(symbol - 277) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 284)
{
unsigned char extra{0};
mInputStream->readNextNBits(5, extra);
auto length = 131 + 32*(symbol - 281) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol == 285)
{
auto length = 258;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
}
if (hit_end_stream)
{
//std::cout << "Found end of stream ok" << std::endl;
}
}
bool HuffmanStream::decode()
{
if (!mUsingFixedCodes)
{
readCodingsTable();
readSymbols();
//std::cout << "Got final buffer size " << mBuffer.size() << std::endl;
for(unsigned idx=0; idx< 100; idx++)
{
//std::cout << idx << " | " << mBuffer[idx] << std::endl;
}
}
else
{
bool found_end_seq{false};
unsigned symbol{0};
while(!found_end_seq)
{
bool valid = readNextCodeLengthSymbol(symbol);
if (!valid)
{
//std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
if (symbol == 256)
{
found_end_seq = true;
break;
}
}
}
return false;
}
void HuffmanStream::readCodingsTable()
{
unsigned char h_lit{0};
mInputStream->readNextNBits(5, h_lit);
mNumLiterals = h_lit + 257;
//std::cout << "Got HLIT " << mNumLiterals << std::endl;
unsigned char h_dist{0};
mInputStream->readNextNBits(5, h_dist);
mNumDistances = h_dist + 1;
//std::cout << "Got HDIST " << mNumDistances << std::endl;
unsigned char h_clen{0};
mInputStream->readNextNBits(4, h_clen);
unsigned num_code_lengths = h_clen + 4;
//std::cout << "Got HCLEN " << num_code_lengths << std::endl;
auto sequence = std::vector<unsigned char>(num_code_lengths, 0);
unsigned char buffer{0};
for(unsigned idx = 0; idx< num_code_lengths; idx++)
{
mInputStream->readNextNBits(3, buffer);
//std::cout << "Got coding table value " << idx << " | " << static_cast<int>(buffer) << " | " << ByteUtils::toString(buffer) << std::endl;
sequence[idx] = buffer;
}
mCodeLengthTable.setInputLengthSequence(sequence, true);
mCodeLengthTable.buildPrefixCodes();
readCodeLengths();
}

View file

@ -0,0 +1,51 @@
#pragma once
#include "BitStream.h"
#include "HuffmanCodeLengthTable.h"
#include <vector>
#include <string>
class HuffmanStream
{
public:
HuffmanStream(BitStream* inputStream, BitStream* outputStream);
bool decode();
void generateFixedCodeMapping();
void setCodeLengthAlphabetLengths(const std::vector<unsigned char>& lengths);
private:
void readCodingsTable();
void readCodeLengths();
void readSymbols();
void copyFromBuffer(unsigned length, unsigned distance);
bool readNextLiteralSymbol(unsigned& buffer);
bool readNextDistanceSymbol(unsigned& buffer);
bool readNextCodeLengthSymbol(unsigned& buffer);
void addValue(unsigned value, unsigned& count, unsigned& lastValue, std::vector<unsigned char>& literals, unsigned numLiterals, std::vector<unsigned char>& distances);
BitStream* mInputStream;
BitStream* mOutputStream;
std::vector<unsigned> mBuffer;
unsigned mNumLiterals{0}; // HLIT + 257
unsigned mNumDistances{0}; // HDIST + 1
bool mUsingFixedCodes{false};
HuffmanCodeLengthTable mCodeLengthTable;
HuffmanCodeLengthTable mLiteralTable;
HuffmanCodeLengthTable mDistanceTable;
};

View file

@ -0,0 +1,121 @@
#include "HuffmanTree.h"
#include "ByteUtils.h"
#include <sstream>
#include <algorithm>
#include <iostream>
PrefixCode::PrefixCode(uint32_t data, unsigned length)
: mLength(length)
{
mData = ByteUtils::mirror(data, length);
}
bool PrefixCode::matches(unsigned length, uint32_t code) const
{
return (mLength == length) && (mData == code);
}
std::string PrefixCode::toString(bool bitsAsRightToLeft) const
{
if (bitsAsRightToLeft)
{
if (mLength <=8 )
{
return ByteUtils::toString(mData).substr(8 - mLength, mLength);
}
else
{
return ByteUtils::toString(mData, mLength);
}
}
else
{
if (mLength <=8 )
{
return ByteUtils::toString(ByteUtils::mirror(mData, mLength)).substr(0, mLength);
}
else
{
return ByteUtils::toString(mData, mLength);
}
}
}
void HuffmanTree::addCodeLengthEntry(unsigned length, const CodeSymbolPair& data)
{
bool found{false};
for (auto& entry : mTable)
{
if (entry.first == length)
{
entry.second.push_back(data);
found = true;
break;
}
}
if (!found)
{
mTable.push_back({length, {data}});
}
}
void HuffmanTree::sortTable()
{
std::sort(mTable.begin(), mTable.end(), [](CodeLengthData a, CodeLengthData b){return a.first < b.first;});
}
std::optional<HuffmanTree::Symbol> HuffmanTree::findMatch(std::size_t treeIndex, uint32_t code) const
{
const auto& legth_data = mTable[treeIndex];
for(const auto& entry : legth_data.second)
{
//std::cout << "Checking if " << entry.second << " matches code " << ByteUtils::toString(code) << std::endl;;
if (entry.first.matches(legth_data.first, code))
{
return entry.second;
}
}
return std::nullopt;
}
std::optional<PrefixCode> HuffmanTree::getCode(Symbol symbol) const
{
for(const auto& entry : mTable)
{
for(const auto& data : entry.second)
{
if (data.second == symbol)
{
return data.first;
}
}
}
return std::nullopt;
}
std::size_t HuffmanTree::getNumCodeLengths() const
{
return mTable.size();
}
unsigned HuffmanTree::getCodeLength(std::size_t idx) const
{
return mTable[idx].first;
}
std::string HuffmanTree::dump(bool bitsAsRightToLeft) const
{
std::stringstream sstr;
for (const auto& code_length_data : mTable)
{
sstr << "Prefix table for Code Length " << code_length_data.first << " has vals: \n";
for (const auto& entry : code_length_data.second)
{
sstr << "Code " << entry.first.toString(bitsAsRightToLeft) << " Symbol: " << entry.second << '\n';
}
}
return sstr.str();
}

View file

@ -0,0 +1,54 @@
#pragma once
#include <vector>
#include <string>
#include <optional>
class PrefixCode
{
public:
PrefixCode(uint32_t data, unsigned length);
std::string toString(bool bitsAsRightToLeft = true) const;
bool matches(unsigned length, uint32_t code) const;
uint32_t getData() const
{
return mData;
}
unsigned getLength() const
{
return mLength;
}
private:
unsigned mLength{0};
uint32_t mData{0};
};
class HuffmanTree
{
public:
using Symbol = unsigned;
using CodeLength = unsigned;
using CodeSymbolPair = std::pair<PrefixCode, Symbol>;
using CodeLengthData = std::pair<CodeLength, std::vector<CodeSymbolPair> >;
void addCodeLengthEntry(unsigned length, const CodeSymbolPair& data);
std::string dump(bool bitsAsRightToLeft = true) const;
std::optional<HuffmanTree::Symbol> findMatch(std::size_t treeIndex, uint32_t code) const;
std::size_t getNumCodeLengths() const;
unsigned getCodeLength(std::size_t idx) const;
std::optional<PrefixCode> getCode(Symbol symbol) const;
void sortTable();
private:
std::vector<CodeLengthData> mTable;
};