Initial fixed huffman coding for png.

This commit is contained in:
James Grogan 2022-11-28 10:16:04 +00:00
parent e4f9393ee7
commit 7f5009fb5e
39 changed files with 1294 additions and 440 deletions

View file

@ -1,7 +1,10 @@
list(APPEND compression_LIB_INCLUDES list(APPEND compression_LIB_INCLUDES
StreamCompressor.cpp StreamCompressor.cpp
HuffmanEncoder.cpp huffman/HuffmanEncoder.cpp
huffman/HuffmanStream.cpp
huffman/HuffmanCodeLengthTable.cpp
huffman/HuffmanTree.cpp
RunLengthEncoder.cpp RunLengthEncoder.cpp
ZlibEncoder.cpp ZlibEncoder.cpp
deflate/DeflateEncoder.cpp deflate/DeflateEncoder.cpp
@ -15,6 +18,7 @@ add_library(compression SHARED ${compression_LIB_INCLUDES})
target_include_directories(compression PUBLIC target_include_directories(compression PUBLIC
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/deflate ${CMAKE_CURRENT_SOURCE_DIR}/deflate
${CMAKE_CURRENT_SOURCE_DIR}/huffman
) )
target_link_libraries(compression PUBLIC core) target_link_libraries(compression PUBLIC core)

View file

@ -1,21 +0,0 @@
#pragma once
#include "RawTree.h"
#include <vector>
#include <unordered_map>
class HuffmanEncoder
{
using DataStream = std::vector<unsigned char>;
using CountPair = std::pair<unsigned char, unsigned>;
public:
void encode(const DataStream& stream);
void encode(const std::unordered_map<unsigned char, unsigned>& counts);
private:
void dumpTree(const RawTree<CountPair>& tree) const;
void dumpNode(RawNode<CountPair>* node, unsigned depth) const;
};

View file

@ -2,6 +2,10 @@
#include "StringUtils.h" #include "StringUtils.h"
#include "BitStream.h" #include "BitStream.h"
#include "ByteUtils.h"
#include "HuffmanEncoder.h"
#include <iostream>
Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream) Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream)
: AbstractEncoder(inputStream, outputStream) : AbstractEncoder(inputStream, outputStream)
@ -61,8 +65,39 @@ void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength,
} }
} }
void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator)
{
mCodeGenerator = std::move(generator);
}
bool Lz77Encoder::encode() bool Lz77Encoder::encode()
{ {
if (!mCodeGenerator)
{
auto code_generator = std::make_unique<HuffmanEncoder>();
auto huffman_encoder = code_generator.get();
mCodeGenerator = std::move(code_generator);
huffman_encoder->setUseFixedCode(true);
huffman_encoder->initializeLiteralLengthTable();
}
while(auto byte = mInputStream->readNextByte())
{
const auto code = mCodeGenerator->getLiteralValue(*byte);
std::cout << "Writing value " << static_cast<int>(*byte) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
mOutputStream->writeNBits(code.getData(), code.getLength());
}
auto eos_code = mCodeGenerator->getEndOfStreamValue();
std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code.getData(), eos_code.getLength()) << "\n";
mOutputStream->writeNBits(eos_code.getData(), eos_code.getLength());
/* /*
unsigned loc{0}; unsigned loc{0};
std::string ret; std::string ret;

View file

@ -1,9 +1,13 @@
#pragma once #pragma once
#include "AbstractEncoder.h" #include "AbstractEncoder.h"
#include "HuffmanEncoder.h"
#include <string> #include <string>
#include <vector> #include <vector>
#include <memory>
class PrefixCodeGenerator;
class Lz77Encoder : public AbstractEncoder class Lz77Encoder : public AbstractEncoder
{ {
@ -24,10 +28,14 @@ public:
void setLookAheadBufferSize(unsigned size); void setLookAheadBufferSize(unsigned size);
void setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator);
private: private:
unsigned mSearchBufferSize{32000}; unsigned mSearchBufferSize{32000};
Buffer mSearchBuffer; Buffer mSearchBuffer;
unsigned mLookAheadBufferSize{256}; unsigned mLookAheadBufferSize{256};
Buffer mLookaheadBuffer; Buffer mLookaheadBuffer;
std::unique_ptr<PrefixCodeGenerator> mCodeGenerator;
}; };

View file

@ -0,0 +1,54 @@
#include "RunLengthEncoder.h"
std::vector<RunLengthEncoder::Hit> RunLengthEncoder::encode(const std::vector<unsigned char>& input)
{
std::vector<RunLengthEncoder::Hit> ret;
if (input.empty())
{
return ret;
}
char working_char{0};
unsigned count = 1;
for(unsigned idx=0; idx<input.size(); idx++)
{
auto c = input[idx];
if (idx == 0)
{
working_char = c;
continue;
}
if (c == working_char)
{
count++;
}
else
{
ret.push_back({working_char, count});
working_char = c;
count = 1;
}
}
ret.push_back({working_char, count});
return ret;
}
std::vector<unsigned char> RunLengthEncoder::decode(const std::vector<RunLengthEncoder::Hit>& input)
{
std::vector<unsigned char> ret;
if (input.empty())
{
return ret;
}
for (const auto& hit : input)
{
for(unsigned idx=0; idx< hit.second; idx++)
{
ret.push_back(hit.first);
}
}
return ret;
}

View file

@ -1,110 +1,15 @@
#pragma once #pragma once
#include "StringUtils.h"
#include <vector> #include <vector>
#include <string>
class RunLengthEncoder class RunLengthEncoder
{ {
public: public:
std::string encode(const std::string& string) using Hit = std::pair<unsigned char, unsigned>;
{
std::string ret;
if (string.empty())
{
return ret;
}
char working_char{0}; std::vector<Hit> encode(const std::vector<unsigned char>& input);
unsigned count = 1;
for(unsigned idx=0; idx<string.size(); idx++)
{
auto c = string[idx];
if (idx == 0)
{
working_char = c;
continue;
}
if (c == working_char) std::vector<unsigned char> decode(const std::vector<Hit>& input);
{
count++;
}
else
{
insertCharacter(ret, working_char, count);
working_char = c;
count = 1;
}
}
insertCharacter(ret, working_char, count);
return ret;
}
std::string decode(const std::string& string)
{
std::string ret;
if (string.empty())
{
return ret;
}
unsigned count{0};
while(count < string.size())
{
auto c = string[count];
if (c == mDelimiter)
{
count++;
std::string reps;
char working_char{0};
while(count < string.size())
{
auto rep_char = string[count];
count++;
if (StringUtils::IsAlphabetical(rep_char))
{
working_char = rep_char;
break;
}
else
{
reps += rep_char;
}
}
for (unsigned idx=0; idx<std::stoul(reps); idx++)
{
ret += working_char;
}
}
else
{
ret += c;
count++;
}
}
return ret;
}
private: private:
void insertCharacter(std::string& output, char c, unsigned count)
{
if (count >= 3)
{
output += mDelimiter + std::to_string(count) + c;
}
else
{
for (unsigned jdx=0;jdx<count; jdx++)
{
output += c;
}
}
}
char mDelimiter {'@'};
}; };

View file

@ -30,6 +30,10 @@ public:
~ZlibEncoder(); ~ZlibEncoder();
void setWindowSize(unsigned size); void setWindowSize(unsigned size);
void setDeflateCompressionMethod(Deflate::CompressionMethod method)
{
mDeflateCompressionMethod = method;
}
bool encode() override; bool encode() override;
bool decode() override; bool decode() override;

View file

@ -14,218 +14,6 @@ DeflateBlock::DeflateBlock(BitStream* inputStream, BitStream* outputStream)
} }
bool DeflateBlock::readNextCodeLengthSymbol(unsigned char& final_symbol)
{
unsigned working_index{0};
auto count = mCodeLengthMapping[working_index].first;
auto delta = count;
bool found{false};
unsigned char buffer{0};
unsigned char working_bits{0};
unsigned working_symbol{0};
while(!found)
{
auto valid = mInputStream->readNextNBits(delta, buffer);
working_bits = (working_bits << delta) | buffer;
for(const auto& entry : mCodeLengthMapping[working_index].second)
{
if (entry.first == working_bits)
{
found = true;
working_symbol = entry.second;
break;
}
}
if (!found)
{
working_index++;
if (working_index >= mCodeLengthMapping.size())
{
break;
}
auto new_count = mCodeLengthMapping[working_index].first;
delta = new_count - count;
count = new_count;
}
}
if (found)
{
final_symbol = working_symbol;
std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl;
std::cout << "At Byte offset " << mInputStream->getCurrentByteOffset() << " and bit offset " << mInputStream->getCurrentBitOffset() << std::endl;
return true;
}
else
{
std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << std::endl;
return false;
}
}
void DeflateBlock::setCodeLengthAlphabetLengths(const std::vector<unsigned char>& lengths)
{
mCodeLengthAlphabetLengths = lengths;
}
void DeflateBlock::setCodeLengthLength(unsigned length)
{
mHclen = length;
}
void DeflateBlock::setLiteralsTableLength(unsigned length)
{
mHlit = length;
}
void DeflateBlock::setDistanceTableLength(unsigned length)
{
mHdist = length;
}
void DeflateBlock::setIsFinalBlock(bool isFinal)
{
mInFinalBlock = isFinal;
}
void DeflateBlock::flushToStream()
{
}
void DeflateBlock::readLiteralCodeLengths()
{
std::vector<unsigned> lengths;
unsigned char symbol{0};
while(lengths.size() < mHlit)
{
bool valid = readNextCodeLengthSymbol(symbol);
if (!valid)
{
std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
if (symbol < 16)
{
lengths.push_back(symbol);
}
else if(symbol == 16)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(2, num_reps);
auto last_val = lengths[lengths.size()-1];
std::cout << "Got val 16 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(last_val);
}
}
else if(symbol == 17)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(3, num_reps);
std::cout << "Got val 17 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(0);
}
}
else if(symbol == 18)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(7, num_reps);
std::cout << "Got val 18 doing " << 11 + num_reps << std::endl;
for(unsigned idx=0; idx< 11 + num_reps; idx++)
{
lengths.push_back(0);
}
}
}
}
void DeflateBlock::buildCodeLengthMapping()
{
for(unsigned idx=1; idx<8; idx++)
{
std::vector<unsigned> entries;
for(unsigned jdx=0; jdx<mCodeLengthAlphabetLengths.size(); jdx++)
{
if (mCodeLengthAlphabetLengths[jdx] == idx)
{
entries.push_back(jdx);
}
}
if (entries.empty())
{
continue;
}
CodeLengthCountEntry count_entry{idx, {}};
std::sort(entries.begin(), entries.end());
unsigned char offset = 0x01 << idx - 1;
unsigned char count{0};
for (auto entry : entries)
{
count_entry.second.push_back(CodeLengthEntry{offset + count, entry});
count++;
}
mCodeLengthMapping.push_back(count_entry);
}
for (const auto& map_data : mCodeLengthMapping)
{
std::cout << "Map entry " << map_data.first << " has vals: " << std::endl;
for (const auto& entry : map_data.second)
{
std::cout << "Key " << ByteUtils::toString(entry.first) << " val: " << entry.second << std::endl;
}
}
}
void DeflateBlock::readDynamicHuffmanTable()
{
unsigned char h_lit{0};
mInputStream->readNextNBits(5, h_lit);
mHlit = h_lit + 257;
std::cout << "Got HLIT " << mHlit << std::endl;
unsigned char h_dist{0};
mInputStream->readNextNBits(5, h_dist);
mHdist = h_dist + 1;
std::cout << "Got HDIST " << mHdist << std::endl;
unsigned char h_clen{0};
mInputStream->readNextNBits(4, h_clen);
mHclen = h_clen + 4;
std::cout << "Got HCLEN " << mHclen << std::endl;
mCodeLengthAlphabetLengths = std::vector<unsigned char>(19, 0);
unsigned char buffer{0};
for(unsigned idx = 0; idx< mHclen; idx++)
{
mInputStream->readNextNBits(3, buffer);
mCodeLengthAlphabetLengths[CODE_LENGTH_ALPHABET_PERMUTATION[idx]] = buffer;
std::cout << "Got code length for " << CODE_LENGTH_ALPHABET_PERMUTATION[idx] << " of " << static_cast<unsigned>(buffer) << std::endl;
}
buildCodeLengthMapping();
readLiteralCodeLengths();
}
std::string DeflateBlock::getMetaData() const std::string DeflateBlock::getMetaData() const
{ {
std::stringstream sstr; std::stringstream sstr;
@ -237,6 +25,11 @@ std::string DeflateBlock::getMetaData() const
return sstr.str(); return sstr.str();
} }
void DeflateBlock::setIsFinalBlock(bool isFinal)
{
mInFinalBlock = isFinal;
}
bool DeflateBlock::isFinalBlock() const bool DeflateBlock::isFinalBlock() const
{ {
return mInFinalBlock; return mInFinalBlock;
@ -245,7 +38,9 @@ bool DeflateBlock::isFinalBlock() const
bool DeflateBlock::read() bool DeflateBlock::read()
{ {
auto working_byte = *mInputStream->readNextByte(); auto working_byte = *mInputStream->readNextByte();
std::cout << "Into process data, starts with: "<< ByteUtils::toString(working_byte) << std::endl;
std::cout << mInputStream->logNextNBytes(11);
std::cout << "DeflateBlock::read location " << mInputStream->logLocation();
unsigned char final_block{0}; unsigned char final_block{0};
mInputStream->readNextNBits(1, final_block); mInputStream->readNextNBits(1, final_block);
@ -257,35 +52,64 @@ bool DeflateBlock::read()
if (mCompressionMethod == Deflate::CompressionMethod::NONE) if (mCompressionMethod == Deflate::CompressionMethod::NONE)
{ {
auto byte0 = *mInputStream->readNextByte(); return readUncompressedStream();
auto byte1 = *mInputStream->readNextByte();
mUncompressedBlockLength = (byte0 << 8) | byte1;
std::cout << "Check block 0: " << ByteUtils::toString(byte0) << std::endl;
std::cout << "Check block 1: " << ByteUtils::toString(byte1) << std::endl;
auto byte2 = *mInputStream->readNextByte();
auto byte3 = *mInputStream->readNextByte();
uint16_t len_check = (byte2 << 8) | byte3;
std::cout << "Check block 2: " << ByteUtils::toString(byte2) << std::endl;
std::cout << "Check block 3: " << ByteUtils::toString(byte3) << std::endl;
//if (!(byte0 ==(~byte2) && byte1 ==(~byte3)))
//{
//std::cout << "Uncompressed block length check failed - aborting." << std::endl;
//return false;
//}
//else
//{
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
{
mOutputStream->writeByte(*mInputStream->readNextByte());
}
//}
} }
else if(mCompressionMethod == Deflate::CompressionMethod::FIXED_HUFFMAN)
{
return readFixedHuffmanStream();
}
else if(mCompressionMethod == Deflate::CompressionMethod::DYNAMIC_HUFFMAN)
{
return readDynamicHuffmanStream();
}
return false;
}
bool DeflateBlock::readUncompressedStream()
{
auto byte0 = *mInputStream->readNextByte();
auto byte1 = *mInputStream->readNextByte();
mUncompressedBlockLength = (byte0 << 8) | byte1;
std::cout << "Check block 0: " << ByteUtils::toString(byte0) << std::endl;
std::cout << "Check block 1: " << ByteUtils::toString(byte1) << std::endl;
auto byte2 = *mInputStream->readNextByte();
auto byte3 = *mInputStream->readNextByte();
uint16_t len_check = (byte2 << 8) | byte3;
std::cout << "Check block 2: " << ByteUtils::toString(byte2) << std::endl;
std::cout << "Check block 3: " << ByteUtils::toString(byte3) << std::endl;
//if (!(byte0 ==(~byte2) && byte1 ==(~byte3)))
//{
//std::cout << "Uncompressed block length check failed - aborting." << std::endl;
//return false;
//}
//else
//{
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
{
mOutputStream->writeByte(*mInputStream->readNextByte());
}
//}
return true; return true;
} }
bool DeflateBlock::readFixedHuffmanStream()
{
std::cout << "Reading fixed huffman stream" << std::endl;
mHuffmanStream = std::make_unique<HuffmanStream>(mInputStream, mOutputStream);
mHuffmanStream->generateFixedCodeMapping();
return mHuffmanStream->decode();
}
bool DeflateBlock::readDynamicHuffmanStream()
{
mHuffmanStream = std::make_unique<HuffmanStream>(mInputStream, mOutputStream);
return mHuffmanStream->decode();
}
void DeflateBlock::write(uint16_t datalength) void DeflateBlock::write(uint16_t datalength)
{ {
mUncompressedBlockLength = datalength; mUncompressedBlockLength = datalength;
@ -296,20 +120,38 @@ void DeflateBlock::write(uint16_t datalength)
if (mCompressionMethod == Deflate::CompressionMethod::NONE) if (mCompressionMethod == Deflate::CompressionMethod::NONE)
{ {
std::cout << "Writing compression block header " << ByteUtils::toString(working_block) << std::endl; writeUncompressedStream(working_block, datalength);
mOutputStream->writeByte(working_block); }
else if (mCompressionMethod == Deflate::CompressionMethod::FIXED_HUFFMAN)
std::cout << "Writing data length " << mUncompressedBlockLength << " " << ByteUtils::toString(mUncompressedBlockLength) << std::endl; {
mOutputStream->writeWord(datalength); mOutputStream->writeNBits(working_block, 3);
while(auto byte = mInputStream->readNextByte())
std::cout << "Writing iverse data length " << ~mUncompressedBlockLength << " " << ByteUtils::toString(~mUncompressedBlockLength) << std::endl;
mOutputStream->writeWord(static_cast<uint16_t>(~mUncompressedBlockLength));
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
{ {
auto byte = *mInputStream->readNextByte(); mOutputStream->writeByte(*byte);
//std::cout << "Writing next byte " << static_cast<int>(byte) << std::endl; }
mOutputStream->writeByte(byte);
if (const auto& remaining_bits = mInputStream->getRemainingBits(); remaining_bits.second > 0)
{
mOutputStream->writeNBits(remaining_bits.first, remaining_bits.second);
} }
} }
} }
void DeflateBlock::writeUncompressedStream(unsigned char working_byte, uint16_t datalength)
{
std::cout << "Writing compression block header " << ByteUtils::toString(working_byte) << std::endl;
mOutputStream->writeByte(working_byte);
std::cout << "Writing data length " << mUncompressedBlockLength << " " << ByteUtils::toString(mUncompressedBlockLength) << std::endl;
mOutputStream->writeWord(datalength);
std::cout << "Writing iverse data length " << ~mUncompressedBlockLength << " " << ByteUtils::toString(~mUncompressedBlockLength) << std::endl;
mOutputStream->writeWord(static_cast<uint16_t>(~mUncompressedBlockLength));
for(unsigned idx=0; idx<mUncompressedBlockLength;idx++)
{
auto byte = *mInputStream->readNextByte();
//std::cout << "Writing next byte " << static_cast<int>(byte) << std::endl;
mOutputStream->writeByte(byte);
}
}

View file

@ -1,9 +1,12 @@
#pragma once #pragma once
#include "DeflateElements.h" #include "DeflateElements.h"
#include "HuffmanStream.h"
#include "BitStream.h" #include "BitStream.h"
#include <memory>
class AbstractChecksumCalculator; class AbstractChecksumCalculator;
class DeflateBlock class DeflateBlock
@ -11,51 +14,33 @@ class DeflateBlock
public: public:
DeflateBlock(BitStream* inputStream, BitStream* outputStream); DeflateBlock(BitStream* inputStream, BitStream* outputStream);
void buildCodeLengthMapping();
std::string getMetaData() const; std::string getMetaData() const;
void flushToStream();
bool isFinalBlock() const; bool isFinalBlock() const;
bool read(); bool read();
void readDynamicHuffmanTable();
void readLiteralCodeLengths();
bool readNextCodeLengthSymbol(unsigned char& buffer);
void setCodeLengthAlphabetLengths(const std::vector<unsigned char>& lengths);
void setCodeLengthLength(unsigned length);
void setLiteralsTableLength(unsigned length);
void setDistanceTableLength(unsigned length);
void setIsFinalBlock(bool isFinal); void setIsFinalBlock(bool isFinal);
void setCompressionMethod(Deflate::CompressionMethod method)
{
mCompressionMethod = method;
}
void write(uint16_t datalength); void write(uint16_t datalength);
private: private:
bool readUncompressedStream();
bool readFixedHuffmanStream();
bool readDynamicHuffmanStream();
void writeUncompressedStream(unsigned char working_byte, uint16_t datalength);
BitStream* mInputStream; BitStream* mInputStream;
BitStream* mOutputStream; BitStream* mOutputStream;
unsigned mHlit{0}; std::unique_ptr<HuffmanStream> mHuffmanStream;
unsigned mHdist{0};
unsigned mHclen{0};
uint16_t mUncompressedBlockLength{0}; uint16_t mUncompressedBlockLength{0};
using CodeLengthEntry = std::pair<unsigned char, unsigned>;
using CodeLengthCountEntry = std::pair<unsigned, std::vector<CodeLengthEntry> >;
std::vector<CodeLengthCountEntry> mCodeLengthMapping;
std::vector<unsigned char> mCodeLengthAlphabetLengths;
static constexpr unsigned CODE_LENGTH_ALPHABET_PERMUTATION[19]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
bool mInFinalBlock{false}; bool mInFinalBlock{false};
Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::NONE}; Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::NONE};
}; };

View file

@ -1,6 +1,7 @@
#include "DeflateEncoder.h" #include "DeflateEncoder.h"
#include "BitStream.h" #include "BitStream.h"
#include "ByteUtils.h"
#include "DeflateBlock.h" #include "DeflateBlock.h"
#include "BufferBitStream.h" #include "BufferBitStream.h"
@ -22,6 +23,7 @@ bool DeflateEncoder::encode()
uint16_t count = 0; uint16_t count = 0;
BufferBitStream stream; BufferBitStream stream;
std::unique_ptr<DeflateBlock> working_block = std::make_unique<DeflateBlock>(&stream, mOutputStream); std::unique_ptr<DeflateBlock> working_block = std::make_unique<DeflateBlock>(&stream, mOutputStream);
working_block->setCompressionMethod(mCompressionMethod);
AbstractChecksumCalculator* checksum_calc; AbstractChecksumCalculator* checksum_calc;
if (mChecksumCalculators.size() > 0) if (mChecksumCalculators.size() > 0)
@ -38,15 +40,22 @@ bool DeflateEncoder::encode()
working_block->write(count); working_block->write(count);
working_block = std::make_unique<DeflateBlock>(&stream, mOutputStream); working_block = std::make_unique<DeflateBlock>(&stream, mOutputStream);
working_block->setCompressionMethod(mCompressionMethod);
stream.reset(); stream.reset();
} }
if (auto byte = mInputStream->readNextByte()) if (auto byte = mInputStream->readNextByte())
{ {
std::cout << "Adding byte " << ByteUtils::toString(*byte) << " to deflate block input" << std::endl;
stream.writeByte(*byte); stream.writeByte(*byte);
} }
else else
{ {
if (const auto& remaining_bits = mInputStream->getRemainingBits(); remaining_bits.second > 0)
{
stream.writeNBits(remaining_bits.first, remaining_bits.second);
}
stream.resetOffsets(); stream.resetOffsets();
working_block->setIsFinalBlock(true); working_block->setIsFinalBlock(true);
@ -56,7 +65,7 @@ bool DeflateEncoder::encode()
} }
count++; count++;
} }
mOutputStream->flushRemainingBits();
mOutputStream->clearChecksumCalculator(); mOutputStream->clearChecksumCalculator();
return true; return true;
} }

View file

@ -0,0 +1,175 @@
#include "HuffmanCodeLengthTable.h"
#include "ByteUtils.h"
#include "RunLengthEncoder.h"
#include <algorithm>
#include <sstream>
#include <iostream>
void HuffmanCodeLengthTable::buildCompressedLengthSequence()
{
RunLengthEncoder rl_encoder;
auto rle_encoded = rl_encoder.encode(mInputLengthSequence);
for (const auto& entry : rle_encoded)
{
std::cout << "Got rle " << static_cast<int>(entry.first) << " | " << entry.second << std::endl;
}
mCompressedLengthSequence.clear();
for (const auto& entry : rle_encoded)
{
const auto length = entry.first;
const auto count = entry.second;
if (count < 3)
{
for(unsigned idx=0; idx<3; idx++)
{
mCompressedLengthSequence.push_back({length, 0});
}
}
else if (length == 0)
{
if(count >=3 && count <=10)
{
mCompressedLengthSequence.push_back({17, count-3});
}
else
{
mCompressedLengthSequence.push_back({18, count-11});
}
}
else
{
mCompressedLengthSequence.push_back({length, 0});
auto num_blocks_of_six = (count-1)/6;
for(unsigned idx=0; idx<num_blocks_of_six; idx++)
{
mCompressedLengthSequence.push_back({16, 3});
}
auto remaining_counts = (count-1) % 6;
if (remaining_counts >= 3)
{
mCompressedLengthSequence.push_back({16, remaining_counts - 3});
}
else
{
for(unsigned idx=0; idx<remaining_counts; idx++)
{
mCompressedLengthSequence.push_back({length, 0});
}
}
}
}
mCompressedLengthCounts = std::vector<unsigned>(19, 0);
for (const auto& entry : mCompressedLengthSequence)
{
mCompressedLengthCounts[entry.first]++;
}
}
const std::vector<HuffmanCodeLengthTable::CompressedSequenceEntry>& HuffmanCodeLengthTable::getCompressedLengthSequence() const
{
return mCompressedLengthSequence;
}
const std::vector<unsigned> HuffmanCodeLengthTable::getCompressedLengthCounts() const
{
return mCompressedLengthCounts;
}
void HuffmanCodeLengthTable::buildPrefixCodes()
{
if(mInputLengthSequence.empty())
{
return;
}
unsigned char max_length = *std::max_element(mInputLengthSequence.begin(), mInputLengthSequence.end());
std::vector<unsigned> counts(max_length+1, 0);
for (const auto length : mInputLengthSequence)
{
counts[length]++;
}
counts[0] = 0;
uint32_t code{0};
std::vector<uint32_t> next_code(max_length + 1, 0);
for (unsigned bits = 1; bits <= max_length; bits++)
{
code = (code + counts[bits-1]) << 1;
next_code[bits] = code;
}
for(std::size_t idx=0; idx<mInputLengthSequence.size(); idx++)
{
if (const auto length = mInputLengthSequence[idx]; length != 0)
{
const auto code = next_code[length];
next_code[length]++;
auto prefix_code = PrefixCode(code, length);
mTree.addCodeLengthEntry(length, {PrefixCode(code, length), idx});
mCodes.push_back(prefix_code);
}
}
mTree.sortTable();
//std::cout << dumpPrefixCodes();
}
const PrefixCode& HuffmanCodeLengthTable::getCode(std::size_t index) const
{
return mCodes[index];
}
std::string HuffmanCodeLengthTable::dumpPrefixCodes() const
{
return mTree.dump();
}
unsigned HuffmanCodeLengthTable::mapToDeflateIndex(unsigned index) const
{
if (index>= DEFLATE_PERMUTATION_SIZE)
{
return 0;
}
else
{
return DEFLATE_PERMUTATION[index];
}
}
unsigned HuffmanCodeLengthTable::getNumCodeLengths() const
{
return mTree.getNumCodeLengths();
}
std::optional<HuffmanTree::Symbol> HuffmanCodeLengthTable::findMatch(std::size_t treeIndex, uint32_t code) const
{
return mTree.findMatch(treeIndex, code);
}
unsigned HuffmanCodeLengthTable::getCodeLength(std::size_t index) const
{
return mTree.getCodeLength(index);
}
void HuffmanCodeLengthTable::setInputLengthSequence(const std::vector<unsigned char>& sequence, bool targetDeflate)
{
mTargetDeflate = targetDeflate;
if (targetDeflate)
{
mInputLengthSequence = std::vector<unsigned char>(DEFLATE_PERMUTATION_SIZE, 0);
for(unsigned idx=0; idx<sequence.size(); idx++)
{
mInputLengthSequence[mapToDeflateIndex(idx)] = sequence[idx];
//std::cout << "Got code length for " << mapToDeflateIndex(idx) << " of " << static_cast<unsigned>(sequence[idx]) << std::endl;
}
}
else
{
mInputLengthSequence = sequence;
}
}

View file

@ -0,0 +1,50 @@
#pragma once
#include "HuffmanTree.h"
#include <vector>
#include <string>
#include <optional>
class HuffmanCodeLengthTable
{
public:
void buildPrefixCodes();
void buildCompressedLengthSequence();
std::string dumpPrefixCodes() const;
std::optional<HuffmanTree::Symbol> findMatch(std::size_t treeIndex, uint32_t code) const;
const HuffmanTree& getTree() const;
const PrefixCode& getCode(std::size_t index) const;
using CompressedSequenceEntry = std::pair<unsigned, unsigned>;
const std::vector<CompressedSequenceEntry>& getCompressedLengthSequence() const;
const std::vector<unsigned> getCompressedLengthCounts() const;
unsigned getNumCodeLengths() const;
unsigned getCodeLength(std::size_t treeIndex) const;
unsigned mapToDeflateIndex(unsigned index) const;
void setInputLengthSequence(const std::vector<unsigned char>& sequence, bool targetDeflate = true);
private:
HuffmanTree mTree;
bool mTargetDeflate{true};
std::vector<unsigned char> mInputLengthSequence;
std::vector<PrefixCode> mCodes;
std::vector<CompressedSequenceEntry> mCompressedLengthSequence;
std::vector<unsigned> mCompressedLengthCounts;
static constexpr unsigned DEFLATE_PERMUTATION_SIZE{19};
static constexpr unsigned DEFLATE_PERMUTATION[DEFLATE_PERMUTATION_SIZE]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
};

View file

@ -0,0 +1,67 @@
#pragma once
#include "RawTree.h"
#include "HuffmanCodeLengthTable.h"
#include "HuffmanFixedCodes.h"
#include <vector>
#include <unordered_map>
class PrefixCodeGenerator
{
public:
virtual ~PrefixCodeGenerator() = default;
virtual const PrefixCode& getLiteralValue(unsigned char value) const = 0;
virtual const PrefixCode& getEndOfStreamValue() const = 0;
};
class HuffmanEncoder : public PrefixCodeGenerator
{
using DataStream = std::vector<unsigned char>;
using CountPair = std::pair<unsigned char, unsigned>;
public:
void encode(const DataStream& stream);
void encode(const std::unordered_map<unsigned char, unsigned>& counts);
void setUseFixedCode(bool useFixed)
{
mUseFixedCode = useFixed;
}
uint32_t getLengthValue(unsigned length)
{
return 0;
}
const PrefixCode& getLiteralValue(unsigned char value) const override
{
return mLiteralLengthTable.getCode(value);
}
const PrefixCode& getEndOfStreamValue() const override
{
return mLiteralLengthTable.getCode(256);
}
void initializeLiteralLengthTable()
{
if(mUseFixedCode)
{
mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
mLiteralLengthTable.buildPrefixCodes();
}
}
private:
void dumpTree(const RawTree<CountPair>& tree) const;
void dumpNode(RawNode<CountPair>* node, unsigned depth) const;
bool mUseFixedCode{false};
bool mTableIsInitialized{false};
HuffmanCodeLengthTable mLiteralLengthTable;
HuffmanCodeLengthTable mDistanceTable;
};

View file

@ -0,0 +1,20 @@
#pragma once
#include <vector>
namespace HuffmanFixedCodes
{
inline std::vector<unsigned char> getDeflateFixedHuffmanCodes()
{
std::vector<std::pair<unsigned, unsigned char> > mappings {{144, 8}, {112, 9}, {24, 7}, {8 ,8}};
std::vector<unsigned char> sequence;
for(const auto& entry : mappings)
{
for(unsigned idx=0;idx<entry.first;idx++)
{
sequence.push_back(entry.second);
}
}
return sequence;
}
}

View file

@ -0,0 +1,202 @@
#include "HuffmanStream.h"
#include "ByteUtils.h"
#include "HuffmanFixedCodes.h"
#include <iostream>
#include <algorithm>
#include <unordered_map>
#include <sstream>
HuffmanStream::HuffmanStream(BitStream* inputStream, BitStream* outputStream)
: mInputStream(inputStream),
mOutputStream(outputStream)
{
}
void HuffmanStream::generateFixedCodeMapping()
{
mUsingFixedCodes = true;
mCodeLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
mCodeLengthTable.buildPrefixCodes();
}
bool HuffmanStream::readNextCodeLengthSymbol(unsigned& final_symbol)
{
if (mCodeLengthTable.getNumCodeLengths() == 0)
{
return false;
}
unsigned working_index{0};
auto length = mCodeLengthTable.getCodeLength(working_index);
auto delta = length;
bool found{false};
unsigned char buffer{0};
uint32_t working_bits{0};
unsigned working_symbol{0};
while(!found)
{
auto valid = mInputStream->readNextNBits(delta, buffer);
//std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;;
working_bits = working_bits | (buffer << (length - delta));
std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl;
if (const auto symbol = mCodeLengthTable.findMatch(working_index, working_bits))
{
found = true;
working_symbol = *symbol;
}
else
{
working_index++;
if (working_index >= mCodeLengthTable.getNumCodeLengths())
{
break;
}
auto new_length = mCodeLengthTable.getCodeLength(working_index);
delta = new_length - length;
length = new_length;
}
}
if (found)
{
final_symbol = working_symbol;
std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl;
std::cout << "At Byte offset " << mInputStream->getCurrentByteOffset() << " and bit offset " << mInputStream->getCurrentBitOffset() << std::endl;
return true;
}
else
{
std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << std::endl;
return false;
}
}
void HuffmanStream::readLiteralCodeLengths()
{
std::vector<unsigned> lengths;
unsigned symbol{0};
while(lengths.size() < mNumLiterals)
{
bool valid = readNextCodeLengthSymbol(symbol);
if (!valid)
{
std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
if (symbol < 16)
{
lengths.push_back(symbol);
}
else if(symbol == 16)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(2, num_reps);
auto last_val = lengths[lengths.size()-1];
std::cout << "Got val 16 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(last_val);
}
}
else if(symbol == 17)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(3, num_reps);
std::cout << "Got val 17 doing " << 3 + num_reps << std::endl;
for(unsigned idx=0; idx< 3 + num_reps; idx++)
{
lengths.push_back(0);
}
}
else if(symbol == 18)
{
unsigned char num_reps{0};
mInputStream->readNextNBits(7, num_reps);
std::cout << "Got val 18 doing " << 11 + num_reps << std::endl;
for(unsigned idx=0; idx< 11 + num_reps; idx++)
{
lengths.push_back(0);
}
}
}
}
bool HuffmanStream::decode()
{
if (!mUsingFixedCodes)
{
readCodingsTable();
}
else
{
bool found_end_seq{false};
unsigned symbol{0};
while(!found_end_seq)
{
bool valid = readNextCodeLengthSymbol(symbol);
if (!valid)
{
std::cout << "Hit unknown symbol - bailing out" << std::endl;
break;
}
if (symbol == 256)
{
found_end_seq = true;
break;
}
}
}
return false;
}
void HuffmanStream::readCodingsTable()
{
unsigned char h_lit{0};
mInputStream->readNextNBits(5, h_lit);
mNumLiterals = h_lit + 257;
std::cout << "Got HLIT " << mNumLiterals << std::endl;
unsigned char h_dist{0};
mInputStream->readNextNBits(5, h_dist);
mNumDistances = h_dist + 1;
std::cout << "Got HDIST " << mNumDistances << std::endl;
unsigned char h_clen{0};
mInputStream->readNextNBits(4, h_clen);
auto num_code_lengths = h_clen + 4;
std::cout << "Got HCLEN " << num_code_lengths << std::endl;
auto sequence = std::vector<unsigned char>(num_code_lengths, 0);
unsigned char buffer{0};
for(unsigned idx = 0; idx< num_code_lengths; idx++)
{
std::cout << "After codings " << mInputStream->logLocation();
mInputStream->readNextNBits(3, buffer);
sequence[idx] = buffer;
}
mCodeLengthTable.setInputLengthSequence(sequence, true);
mCodeLengthTable.buildPrefixCodes();
readLiteralCodeLengths();
}

View file

@ -0,0 +1,38 @@
#pragma once
#include "BitStream.h"
#include "HuffmanCodeLengthTable.h"
#include <vector>
#include <string>
class HuffmanStream
{
public:
HuffmanStream(BitStream* inputStream, BitStream* outputStream);
bool decode();
void generateFixedCodeMapping();
void setCodeLengthAlphabetLengths(const std::vector<unsigned char>& lengths);
private:
void readCodingsTable();
void readLiteralCodeLengths();
bool readNextCodeLengthSymbol(unsigned& buffer);
BitStream* mInputStream;
BitStream* mOutputStream;
unsigned mNumLiterals{0}; // HLIT + 257
unsigned mNumDistances{0}; // HDIST + 1
bool mUsingFixedCodes{false};
HuffmanCodeLengthTable mCodeLengthTable;
};

View file

@ -0,0 +1,106 @@
#include "HuffmanTree.h"
#include "ByteUtils.h"
#include <sstream>
#include <algorithm>
#include <iostream>
PrefixCode::PrefixCode(uint32_t data, unsigned length)
: mLength(length)
{
mData = ByteUtils::mirror(data, length);
}
bool PrefixCode::matches(unsigned length, uint32_t code) const
{
return (mLength == length) && (mData == code);
}
std::string PrefixCode::toString(bool bitsAsRightToLeft) const
{
if (bitsAsRightToLeft)
{
if (mLength <=8 )
{
return ByteUtils::toString(mData).substr(8 - mLength, mLength);
}
else
{
return ByteUtils::toString(mData, mLength);
}
}
else
{
if (mLength <=8 )
{
return ByteUtils::toString(ByteUtils::mirror(mData, mLength)).substr(0, mLength);
}
else
{
return ByteUtils::toString(mData, mLength);
}
}
}
void HuffmanTree::addCodeLengthEntry(unsigned length, const CodeSymbolPair& data)
{
bool found{false};
for (auto& entry : mTable)
{
if (entry.first == length)
{
entry.second.push_back(data);
found = true;
break;
}
}
if (!found)
{
mTable.push_back({length, {data}});
}
}
void HuffmanTree::sortTable()
{
std::sort(mTable.begin(), mTable.end(), [](CodeLengthData a, CodeLengthData b){return a.first < b.first;});
}
std::optional<HuffmanTree::Symbol> HuffmanTree::findMatch(std::size_t treeIndex, uint32_t code) const
{
const auto& legth_data = mTable[treeIndex];
for(const auto& entry : legth_data.second)
{
//std::cout << "Checking if " << entry.second << " matches code " << ByteUtils::toString(code) << std::endl;;
if (entry.first.matches(legth_data.first, code))
{
return entry.second;
}
}
return std::nullopt;
}
std::size_t HuffmanTree::getNumCodeLengths() const
{
return mTable.size();
}
unsigned HuffmanTree::getCodeLength(std::size_t idx) const
{
return mTable[idx].first;
}
std::string HuffmanTree::dump(bool bitsAsRightToLeft) const
{
std::stringstream sstr;
for (const auto& code_length_data : mTable)
{
sstr << "Prefix table for Code Length " << code_length_data.first << " has vals: \n";
for (const auto& entry : code_length_data.second)
{
sstr << "Code " << entry.first.toString(bitsAsRightToLeft) << " Symbol: " << entry.second << '\n';
}
}
return sstr.str();
}

View file

@ -0,0 +1,52 @@
#pragma once
#include <vector>
#include <string>
#include <optional>
class PrefixCode
{
public:
PrefixCode(uint32_t data, unsigned length);
std::string toString(bool bitsAsRightToLeft = true) const;
bool matches(unsigned length, uint32_t code) const;
uint32_t getData() const
{
return mData;
}
unsigned getLength() const
{
return mLength;
}
private:
unsigned mLength{0};
uint32_t mData{0};
};
class HuffmanTree
{
public:
using Symbol = unsigned;
using CodeLength = unsigned;
using CodeSymbolPair = std::pair<PrefixCode, Symbol>;
using CodeLengthData = std::pair<CodeLength, std::vector<CodeSymbolPair> >;
void addCodeLengthEntry(unsigned length, const CodeSymbolPair& data);
std::string dump(bool bitsAsRightToLeft = true) const;
std::optional<HuffmanTree::Symbol> findMatch(std::size_t treeIndex, uint32_t code) const;
std::size_t getNumCodeLengths() const;
unsigned getCodeLength(std::size_t idx) const;
void sortTable();
private:
std::vector<CodeLengthData> mTable;
};

View file

@ -26,7 +26,20 @@ unsigned char ByteUtils::getByteN(uint32_t input, unsigned n)
return (input << 8*n) >> 24; return (input << 8*n) >> 24;
} }
unsigned char ByteUtils::getLowerNBits(unsigned char input, unsigned num) unsigned char ByteUtils::mirror(unsigned char byte, unsigned length)
{
unsigned char ret{0};
for(unsigned idx=0; idx<length; idx++)
{
if (getBitN(byte, length - 1 - idx))
{
ret |= (0x01 << idx);
}
}
return ret;
}
unsigned char ByteUtils::getLowerNBits(uint32_t input, unsigned num)
{ {
switch (num) switch (num)
{ {
@ -81,7 +94,7 @@ unsigned char ByteUtils::getMBitsAtN(unsigned char input, unsigned m, unsigned n
} }
} }
unsigned char ByteUtils::getBitN(unsigned char input, unsigned n) bool ByteUtils::getBitN(uint32_t input, unsigned n)
{ {
return input & (1 << n); return input & (1 << n);
} }
@ -105,12 +118,34 @@ unsigned char ByteUtils::getFromString(const std::string& string)
return ret; return ret;
} }
std::string ByteUtils::toString(unsigned char c) std::string ByteUtils::toString(uint32_t input, unsigned length)
{ {
std::string ret; std::string ret;
for(unsigned idx=0; idx<8; idx++) std::string working;
for(unsigned idx=0; idx<length; idx++)
{ {
ret += getBitN(c, 7 - idx) ? '1' : '0'; if (idx > 0 && idx % 8 == 0)
{
if (ret.empty())
{
ret = working;
}
else
{
ret = working + '-' + ret;
}
working = "";
}
working += getBitN(input, 7 - idx) ? '1' : '0';
}
if (length <= 8)
{
ret = working;
}
else if(!working.empty())
{
ret = working + '-' + ret;
} }
return ret; return ret;
} }

View file

@ -21,17 +21,19 @@ public:
static unsigned char getHigherNBits(unsigned char input, unsigned num); static unsigned char getHigherNBits(unsigned char input, unsigned num);
static unsigned char getLowerNBits(unsigned char input, unsigned num); static unsigned char getLowerNBits(uint32_t input, unsigned num);
static unsigned char getTwoBitsAtN(unsigned char input, unsigned n); static unsigned char getTwoBitsAtN(unsigned char input, unsigned n);
static unsigned char getMBitsAtN(unsigned char input, unsigned m, unsigned n); static unsigned char getMBitsAtN(unsigned char input, unsigned m, unsigned n);
static unsigned char getBitN(unsigned char input, unsigned n); static bool getBitN(uint32_t input, unsigned n);
static unsigned char getFromString(const std::string& string); static unsigned char getFromString(const std::string& string);
static std::string toString(unsigned char c); static std::string toString(uint32_t input, unsigned length = 8);
static unsigned char mirror(unsigned char byte, unsigned length=0);
static void ReverseBuffer(char* buffer, char* reverse, unsigned size, unsigned targetSize); static void ReverseBuffer(char* buffer, char* reverse, unsigned size, unsigned targetSize);

View file

@ -24,6 +24,7 @@ void BitStream::write(uint32_t data)
unsigned num_bytes = sizeof(uint32_t); unsigned num_bytes = sizeof(uint32_t);
for(unsigned idx=0; idx<num_bytes;idx++) for(unsigned idx=0; idx<num_bytes;idx++)
{ {
std::cout << "Writing byte " << idx << " for multibyte" << std::endl;
writeByte(ByteUtils::getByteN(data, idx)); writeByte(ByteUtils::getByteN(data, idx));
} }
} }
@ -46,18 +47,70 @@ unsigned BitStream::getCurrentBitOffset() const
return mBitOffset; return mBitOffset;
} }
std::string BitStream::logLocation()
{
std::stringstream sstr;
sstr << "Byte offset " << mByteOffset<< " | Bit offset " << mBitOffset;
sstr << " | Working byte " << ByteUtils::toString(getCurrentByte()) << '\n';
return sstr.str();
}
std::string BitStream::logNextNBytes(unsigned n) const std::string BitStream::logNextNBytes(unsigned n) const
{ {
std::stringstream sstr; std::stringstream sstr;
unsigned count{0}; unsigned count{0};
for(auto byte : peekNextNBytes(n)) for(auto byte : peekNextNBytes(n))
{ {
sstr << count << " | " << ByteUtils::toString(byte) + '\n'; sstr << mByteOffset + count << " | " << ByteUtils::toString(byte) + '\n';
count++; count++;
} }
return sstr.str(); return sstr.str();
} }
void BitStream::writeNBits(uint32_t data, unsigned length)
{
const auto num_left = 8 - mBitOffset;
const int overshoot = length - num_left;
if (overshoot > 0)
{
unsigned char lower_bits = ByteUtils::getLowerNBits(data, num_left);
mCurrentByte |= lower_bits << mBitOffset;
writeByte(mCurrentByte, false);
auto num_bytes = overshoot / 8;
for (unsigned idx=0; idx< num_bytes; idx++)
{
mCurrentByte = ByteUtils::getMBitsAtN(data, overshoot, idx*8 + num_left);
writeByte(mCurrentByte, false);
}
if (const auto remainder = overshoot % 8; remainder > 0)
{
mCurrentByte = ByteUtils::getMBitsAtN(data, remainder, num_bytes*8 + num_left);
mBitOffset = remainder;
}
else
{
mCurrentByte = 0;
mBitOffset = 0;
}
}
else
{
mCurrentByte |= (static_cast<unsigned char>(data) << mBitOffset);
mBitOffset += length;
if (mBitOffset == 8)
{
writeByte(mCurrentByte, false);
mCurrentByte = 0;
mBitOffset = 0;
}
}
}
bool BitStream::readNextNBits(unsigned n, unsigned char& buffer) bool BitStream::readNextNBits(unsigned n, unsigned char& buffer)
{ {
if (mByteOffset < 0) if (mByteOffset < 0)

View file

@ -21,13 +21,17 @@ public:
std::string logNextNBytes(unsigned n) const; std::string logNextNBytes(unsigned n) const;
std::string logLocation();
virtual std::vector<unsigned char> peekNextNBytes(unsigned n) const = 0; virtual std::vector<unsigned char> peekNextNBytes(unsigned n) const = 0;
virtual bool readNextNBits(unsigned n, unsigned char& buffer); virtual bool readNextNBits(unsigned n, unsigned char& buffer);
virtual std::optional<unsigned char> readNextByte() = 0; virtual std::optional<unsigned char> readNextByte() = 0;
virtual void writeByte(unsigned char data) = 0; virtual void writeNBits(uint32_t data, unsigned length);
virtual void writeByte(unsigned char data, bool checkOverflow = true) = 0;
void write(uint32_t data); void write(uint32_t data);
@ -37,6 +41,11 @@ public:
void resetOffsets() void resetOffsets()
{ {
mEndByteOffset = mByteOffset;
mEndBitOffset = mBitOffset;
mEndByte = mCurrentByte;
mCurrentByte = 0;
mByteOffset = -1; mByteOffset = -1;
mBitOffset = 0; mBitOffset = 0;
} }
@ -47,6 +56,20 @@ public:
mCurrentByte = 0; mCurrentByte = 0;
} }
void flushRemainingBits()
{
if (mBitOffset > 0)
{
writeByte(mCurrentByte, false);
mBitOffset = 0;
}
}
std::pair<unsigned char, unsigned> getRemainingBits() const
{
return {mEndByte, mEndBitOffset};
}
void setChecksumCalculator(AbstractChecksumCalculator* calc) void setChecksumCalculator(AbstractChecksumCalculator* calc)
{ {
mChecksumCalculator = calc; mChecksumCalculator = calc;
@ -60,8 +83,10 @@ public:
protected: protected:
int mByteOffset{-1}; int mByteOffset{-1};
unsigned mBitOffset{0}; unsigned mBitOffset{0};
unsigned char mCurrentByte{0}; unsigned char mCurrentByte{0};
int mEndByteOffset{-1};
unsigned mEndBitOffset{0};
unsigned char mEndByte{0};
AbstractChecksumCalculator* mChecksumCalculator{nullptr}; AbstractChecksumCalculator* mChecksumCalculator{nullptr};
}; };

View file

@ -1,5 +1,7 @@
#include "BufferBitStream.h" #include "BufferBitStream.h"
#include "ByteUtils.h"
#include <iostream> #include <iostream>
bool BufferBitStream::isFinished() const bool BufferBitStream::isFinished() const
@ -50,13 +52,27 @@ void BufferBitStream::setBuffer(const std::vector<unsigned char>& data)
mBuffer = data; mBuffer = data;
} }
void BufferBitStream::writeByte(unsigned char data) void BufferBitStream::writeByte(unsigned char data, bool checkOverflow)
{ {
unsigned char out_byte{0};
if (checkOverflow && mBitOffset > 0)
{
out_byte = ByteUtils::getLowerNBits(mCurrentByte, mBitOffset);
out_byte |= data << mBitOffset;
mCurrentByte = ByteUtils::getHigherNBits(data, mBitOffset);
}
else
{
out_byte = data;
}
if (mChecksumCalculator) if (mChecksumCalculator)
{ {
mChecksumCalculator->addValue(data); mChecksumCalculator->addValue(out_byte);
} }
mBuffer.push_back(data); std::cout << "Writing byte " << ByteUtils::toString(out_byte) << " had bitoffset of " << mBitOffset << std::endl;
mBuffer.push_back(out_byte);
} }

View file

@ -15,7 +15,7 @@ public:
void setBuffer(const std::vector<unsigned char>& data); void setBuffer(const std::vector<unsigned char>& data);
void writeByte(unsigned char data) override; void writeByte(unsigned char data, bool checkOverflow = true) override;
void writeBytes(const std::vector<unsigned char> data) override void writeBytes(const std::vector<unsigned char> data) override
{ {

View file

@ -29,7 +29,7 @@ std::optional<unsigned char> InputBitStream::readNextByte()
} }
} }
void InputBitStream::writeByte(unsigned char data) void InputBitStream::writeByte(unsigned char data, bool checkOverflow )
{ {
} }

View file

@ -14,7 +14,7 @@ class InputBitStream : public BitStream
std::optional<unsigned char> readNextByte() override; std::optional<unsigned char> readNextByte() override;
void writeByte(unsigned char data) override; void writeByte(unsigned char data, bool checkOverflow = true) override;
void writeBytes(const std::vector<unsigned char> data) override void writeBytes(const std::vector<unsigned char> data) override
{ {

View file

@ -22,7 +22,7 @@ std::optional<unsigned char> OutputBitStream::readNextByte()
return std::nullopt; return std::nullopt;
} }
void OutputBitStream::writeByte(unsigned char data) void OutputBitStream::writeByte(unsigned char data, bool checkOverflow )
{ {
(*mStream) << data; (*mStream) << data;
} }

View file

@ -15,7 +15,7 @@ public:
std::optional<unsigned char> readNextByte() override; std::optional<unsigned char> readNextByte() override;
void writeByte(unsigned char data) override; void writeByte(unsigned char data, bool checkOverflow = true) override;
void writeBytes(const std::vector<unsigned char> data) override; void writeBytes(const std::vector<unsigned char> data) override;

View file

@ -29,7 +29,7 @@ std::optional<unsigned char> ImageBitStream::readNextByte()
return val; return val;
} }
void ImageBitStream::writeByte(unsigned char data) void ImageBitStream::writeByte(unsigned char data, bool checkOverflow )
{ {
mByteOffset++; mByteOffset++;

View file

@ -15,7 +15,7 @@ public:
std::optional<unsigned char> readNextByte() override; std::optional<unsigned char> readNextByte() override;
void writeByte(unsigned char data) override; void writeByte(unsigned char data, bool checkOverflow = true) override;
void writeBytes(const std::vector<unsigned char> data) override void writeBytes(const std::vector<unsigned char> data) override
{ {

View file

@ -30,6 +30,11 @@ std::unique_ptr<PngWriter> PngWriter::Create()
return std::make_unique<PngWriter>(); return std::make_unique<PngWriter>();
} }
void PngWriter::setCompressionMethod(Deflate::CompressionMethod method)
{
mCompressionMethod = method;
}
void PngWriter::setPath(const Path& path) void PngWriter::setPath(const Path& path)
{ {
mPath = path; mPath = path;
@ -91,6 +96,7 @@ void PngWriter::writeHeader()
void PngWriter::writeEndChunk() void PngWriter::writeEndChunk()
{ {
std::cout << "Start writing end chunk" << std::endl;
unsigned length{0}; unsigned length{0};
mOutStream->write(length); mOutStream->write(length);
@ -145,6 +151,7 @@ void PngWriter::writeDataChunks(const BufferBitStream& buffer)
auto crc = crc_check.getChecksum(); auto crc = crc_check.getChecksum();
std::cout << "Writing idat crc" << crc << std::endl; std::cout << "Writing idat crc" << crc << std::endl;
mOutStream->write(crc); mOutStream->write(crc);
std::cout << "Finished Writing idat crc" << crc << std::endl;
} }
} }
@ -192,12 +199,17 @@ void PngWriter::write(const std::unique_ptr<Image<unsigned char> >& image)
lz77_out_stream = std::make_unique<BufferBitStream>(); lz77_out_stream = std::make_unique<BufferBitStream>();
Lz77Encoder lz77_encoder(filter_out_stream.get(), lz77_out_stream.get()); Lz77Encoder lz77_encoder(filter_out_stream.get(), lz77_out_stream.get());
if (mCompressionMethod == Deflate::CompressionMethod::DYNAMIC_HUFFMAN)
{
// Set up custom encoder;
}
lz77_encoder.encode(); lz77_encoder.encode();
lz77_out_stream->resetOffsets(); lz77_out_stream->resetOffsets();
} }
BufferBitStream zlib_out_stream; BufferBitStream zlib_out_stream;
ZlibEncoder zlib_encoder(lz77_out_stream.get(), &zlib_out_stream); ZlibEncoder zlib_encoder(lz77_out_stream.get(), &zlib_out_stream);
zlib_encoder.setDeflateCompressionMethod(mCompressionMethod);
zlib_encoder.encode(); zlib_encoder.encode();
zlib_out_stream.resetOffsets(); zlib_out_stream.resetOffsets();

View file

@ -22,6 +22,8 @@ public:
static std::unique_ptr<PngWriter> Create(); static std::unique_ptr<PngWriter> Create();
void setCompressionMethod(Deflate::CompressionMethod method);
void setPath(const Path& path); void setPath(const Path& path);
void setPngInfo(const PngInfo& info); void setPngInfo(const PngInfo& info);

View file

@ -18,6 +18,7 @@ list(APPEND TestFiles
compiler/TestLexer.cpp compiler/TestLexer.cpp
compiler/TestTemplatingEngine.cpp compiler/TestTemplatingEngine.cpp
compression/TestStreamCompressor.cpp compression/TestStreamCompressor.cpp
compression/TestHuffmanStream.cpp
database/TestDatabase.cpp database/TestDatabase.cpp
fonts/TestFontReader.cpp fonts/TestFontReader.cpp
graphics/TestRasterizer.cpp graphics/TestRasterizer.cpp

View file

@ -0,0 +1,60 @@
#include <iostream>
#include "HuffmanStream.h"
void testHuffmanCodeLengthTable()
{
HuffmanCodeLengthTable table;
std::vector<std::pair<unsigned, unsigned char> > mappings {{144, 8}, {112, 9}, {24, 7}, {8 ,8}};
std::vector<unsigned char> code_length_sequence;
for(const auto& entry : mappings)
{
for(unsigned idx=0;idx<entry.first;idx++)
{
code_length_sequence.push_back(entry.second);
}
}
table.setInputLengthSequence(code_length_sequence, false);
table.buildCompressedLengthSequence();
auto compressed_sequence = table.getCompressedLengthSequence();
for (auto entry : compressed_sequence)
{
std::cout << "Count " << entry.first << " extra bits " << entry.second << std::endl;
}
auto compressed_lengths = table.getCompressedLengthCounts();
for(unsigned idx = 0; idx<compressed_lengths.size(); idx++)
{
std::cout << "Slot " << idx << " length " << compressed_lengths[idx] << std::endl;
}
}
int main()
{
testHuffmanCodeLengthTable();
//HuffmanStream stream(nullptr, nullptr);
//stream.setCodeLengthAlphabetLengths({3, 3, 3, 3, 3, 2, 4, 4});
//stream.setCodeLengthAlphabetLengths({2, 2, 3, 3, 3, 3});
//stream.buildCodeLengthMapping();
std::cout << "*******" << std::endl;
//stream.setCodeLengthAlphabetLengths({4, 0, 6, 7, 3, 2, 4, 2, 7, 4, 6, 3, 0, 6});
//stream.buildCodeLengthMapping();
//const auto mapping = stream.getCodeLengthMapping();
//stream.generateFixedCodeMapping();
return 0;
}

View file

@ -3,9 +3,13 @@
#include <iostream> #include <iostream>
int main() void testReading()
{ {
std::vector<std::string> bytes{"11100101", "00110101", "00010001"}; std::vector<std::string> bytes{
"11101101",
"01011101",
"00001001",
"01111111"};
BufferBitStream stream; BufferBitStream stream;
for(const auto& byte : bytes) for(const auto& byte : bytes)
@ -14,17 +18,67 @@ int main()
} }
unsigned char buffer{0} ; unsigned char buffer{0} ;
auto valid = stream.readNextNBits(3, buffer); auto valid = stream.readNextNBits(1, buffer);
std::cout << "Slice0 is " << ByteUtils::toString(buffer) << std::endl; std::cout << "Slice0 is " << ByteUtils::toString(buffer) << std::endl;
valid = stream.readNextNBits(3, buffer); valid = stream.readNextNBits(2, buffer);
std::cout << "Slice1 is " << ByteUtils::toString(buffer) << std::endl; std::cout << "Slice1 is " << ByteUtils::toString(buffer) << std::endl;
valid = stream.readNextNBits(5, buffer); valid = stream.readNextNBits(5, buffer);
std::cout << "Slice2 is " << ByteUtils::toString(buffer) << std::endl; std::cout << "Slice2 is " << ByteUtils::toString(buffer) << std::endl;
valid = stream.readNextNBits(7, buffer); valid = stream.readNextNBits(5, buffer);
std::cout << "Slice3 is " << ByteUtils::toString(buffer) << std::endl; std::cout << "Slice3 is " << ByteUtils::toString(buffer) << std::endl;
valid = stream.readNextNBits(4, buffer);
std::cout << "Slice3 is " << ByteUtils::toString(buffer) << " and int " << static_cast<int>(buffer) << std::endl;
valid = stream.readNextNBits(3, buffer);
std::cout << "Slice3 is " << ByteUtils::toString(buffer) << std::endl;
}
void testWriting()
{
BufferBitStream stream;
stream.writeByte(ByteUtils::getFromString("01100000"));
auto bits0 = ByteUtils::getFromString("00000111");
stream.writeNBits(bits0, 3);
stream.writeByte(ByteUtils::getFromString("11110000"));
auto bits1 = ByteUtils::getFromString("01001101");
stream.writeNBits(bits1, 7);
stream.writeByte(ByteUtils::getFromString("11110000"));
auto bits2 = ByteUtils::getFromString("00000001");
stream.writeNBits(bits2, 1);
stream.flushRemainingBits();
stream.resetOffsets();
auto byte0 = ByteUtils::toString(*stream.readNextByte());
auto byte1 = ByteUtils::toString(*stream.readNextByte());
auto byte2 = ByteUtils::toString(*stream.readNextByte());
auto byte3 = ByteUtils::toString(*stream.readNextByte());
auto byte4 = ByteUtils::toString(*stream.readNextByte());
std::cout << "Got bytes 0 " << byte0 << std::endl;
std::cout << "Got bytes 1 " << byte1 << std::endl;
std::cout << "Got bytes 2 " << byte2 << std::endl;
std::cout << "Got bytes 3 " << byte3 << std::endl;
std::cout << "Got bytes 4 " << byte4 << std::endl;
}
int main()
{
//testReading()
testWriting();
return 0; return 0;
} }

View file

@ -24,5 +24,10 @@ int main()
std::cout << "Byte2 is " << ByteUtils::toString(byte2) << std::endl; std::cout << "Byte2 is " << ByteUtils::toString(byte2) << std::endl;
std::cout << "Byte3 is " << ByteUtils::toString(byte3) << std::endl; std::cout << "Byte3 is " << ByteUtils::toString(byte3) << std::endl;
std::cout << "Mirroring" << std::endl;
auto out = ByteUtils::mirror(byte);
std::cout << "Mirror is " << ByteUtils::toString(out) << std::endl;
return 0; return 0;
} }

View file

@ -5,25 +5,43 @@
#include "Image.h" #include "Image.h"
#include <iostream> #include <iostream>
int main() void testThirdParty()
{ {
//const auto path = "/home/jmsgrogan/Downloads/test.png"; //const auto path = "/home/jmsgrogan/Downloads/test.png";
//const auto path = "/home/jmsgrogan/Downloads/index.png"; const auto path = "/home/jmsgrogan/Downloads/index.png";
const auto path = "/home/jmsgrogan/code/MediaTool-build/bin/test.png"; //const auto path = "/home/jmsgrogan/code/MediaTool-build/bin/test.png";
File file(path); //File file(path);
std::cout << file.dumpBinary(); //std::cout << file.dumpBinary();
PngReader reader; PngReader reader;
reader.setPath(path); reader.setPath(path);
auto image = reader.read(); auto image = reader.read();
for(unsigned idx=0; idx<image->getWidth()*image->getBytesPerRow(); idx++) //for(unsigned idx=0; idx<image->getWidth()*image->getBytesPerRow(); idx++)
{ //{
std::cout << "Image val: " << idx << " | " << static_cast<int>(image->getDataRef()[idx]) << std::endl; // std::cout << "Image val: " << idx << " | " << static_cast<int>(image->getDataRef()[idx]) << std::endl;
} //}
}
void testFixedCode()
{
const auto path = "/home/jmsgrogan/code/MediaTool-build/bin/test_fixed.png";
//File file(path);
//std::cout << file.dumpBinary();
PngReader reader;
reader.setPath(path);
auto image = reader.read();
}
int main()
{
testThirdParty();
//testFixedCode();
return 0; return 0;
} }

View file

@ -3,11 +3,12 @@
#include "File.h" #include "File.h"
#include "BitStream.h" #include "BitStream.h"
#include "ByteUtils.h"
#include "ImagePrimitives.h" #include "ImagePrimitives.h"
#include <iostream> #include <iostream>
int main() void testCompressedPng()
{ {
unsigned width = 20; unsigned width = 20;
unsigned height = 20; unsigned height = 20;
@ -26,11 +27,11 @@ int main()
image->setData(data); image->setData(data);
PngWriter writer; PngWriter writer;
writer.setPath("test.png"); writer.setPath("test_compressed.png");
writer.write(image); writer.write(image);
return 0; return;
File test_file("test.png"); File test_file("test_compressed.png");
test_file.SetAccessMode(File::AccessMode::Read); test_file.SetAccessMode(File::AccessMode::Read);
test_file.Open(true); test_file.Open(true);
@ -39,6 +40,41 @@ int main()
std::cout << static_cast<unsigned>(*byte) << std::endl; std::cout << static_cast<unsigned>(*byte) << std::endl;
} }
test_file.Close(); test_file.Close();
}
void testFixedPng()
{
unsigned width = 10;
unsigned height = 10;
unsigned numChannels = 1;
auto image = Image<unsigned char>::Create(width, height);
image->setNumChannels(numChannels);
image->setBitDepth(8);
std::vector<unsigned char> data(width*height, 0);
for (unsigned idx=0; idx<width*height; idx++)
{
//unsigned char val = 100 * idx /(width*height);
unsigned char val = 10;
data[idx] = val;
}
image->setData(data);
PngWriter writer;
writer.setPath("test_fixed.png");
writer.setCompressionMethod(Deflate::CompressionMethod::FIXED_HUFFMAN);
writer.write(image);
//return;
File test_file("test_fixed.png");
std::cout << test_file.dumpBinary();
}
int main()
{
//testCompressedPng();
testFixedPng();
return 0; return 0;
} }