Convert lz77 to use fixed buffer sizes.

This commit is contained in:
James Grogan 2022-11-29 12:05:08 +00:00
parent a6e31c8d39
commit af6fad72eb
9 changed files with 362 additions and 110 deletions

View file

@ -8,55 +8,102 @@
#include <iostream> #include <iostream>
Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream) Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream)
: AbstractEncoder(inputStream, outputStream) : AbstractEncoder(inputStream, outputStream),
mSearchBuffer(mSearchBufferSize),
mLookaheadBuffer(mLookAheadBufferSize)
{ {
} }
void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator)
unsigned Lz77Encoder::lookAheadForMatchingChars(std::vector<char>& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc)
{ {
auto remaining_size = stream.size() - streamLoc; mCodeGenerator = std::move(generator);
}
unsigned num_hits{1}; bool Lz77Encoder::hitBufferFull() const
for (unsigned jdx=1; jdx< remaining_size; jdx++) {
return mHitBuffer.size() == mMaxHitBufferSize;
}
void Lz77Encoder::populateSearchBuffer(const Hit& hit)
{
const auto& [length, distance, next_char] = hit;
if (length == 0)
{ {
char buffer_char{0}; mSearchBuffer.addItem(next_char);
if (searchIndex + jdx < mSearchBuffer.size())
{
buffer_char = mSearchBuffer[searchIndex + jdx];
} }
else else
{ {
buffer_char = stream[jdx - hitOffset]; std::vector<unsigned char> new_items(distance, 0);
for(unsigned idx=0 ;idx<distance; idx++)
{
new_items[idx] = getSearchBufferItem(idx);
}
for(auto item : new_items)
{
mSearchBuffer.addItem(item);
} }
auto lookahead_char = stream[streamLoc + jdx]; int difference = int(length) - distance;
if (lookahead_char == buffer_char) if (difference > 0)
{ {
matchBuffer.push_back(buffer_char); for(unsigned idx=0; idx<difference; idx++)
num_hits++; {
mSearchBuffer.addItem(mLookaheadBuffer.getItem(idx));
}
}
}
}
unsigned char Lz77Encoder::getSearchBufferItem(unsigned index) const
{
return mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - index);
}
unsigned Lz77Encoder::lookAheadForMatchingChars(unsigned distance)
{
unsigned length{0};
std::cout << "In hit check at distance " << distance << " max buffer index is: " << mMaxLookAheadBufferIndex << std::endl;
for(unsigned idx=0; idx<mMaxLookAheadBufferIndex + 1; idx++)
{
int search_offset = int(distance-1) - idx;
std::cout << "Have search offet " << search_offset << std::endl;
unsigned char search_char{0};
if (search_offset < 0)
{
search_char = mLookaheadBuffer.getItem(-search_offset - 1);
} }
else else
{ {
search_char = getSearchBufferItem(static_cast<unsigned>(search_offset));
}
unsigned char lookahead_char = mLookaheadBuffer.getItem(idx);
std::cout << "Checking search char " << static_cast<int>(search_char) << " and lookup char " << static_cast<int>(lookahead_char) << std::endl;
if ((lookahead_char != search_char) || (idx == mMaxLookAheadBufferIndex))
{
if (idx + 1>= mMinLengthMatchSize)
{
length = idx + 1;
}
break; break;
} }
} }
return num_hits; return length;
} }
void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc) void Lz77Encoder::lookForMatches(unsigned char searchChar, unsigned& hitLength, unsigned& hitOffset)
{ {
for(unsigned idx=0; idx<mSearchBuffer.size(); idx++) for (unsigned idx = 0; idx< mSearchBuffer.getNumItems(); idx++)
{ {
auto search_index = mSearchBuffer.size() - idx - 1; if (mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - idx) == searchChar)
if (auto buffer_char = mSearchBuffer[search_index]; buffer_char == searchChar)
{ {
std::vector<char> match_buffer{buffer_char}; std::cout << "Looking for hits " << std::endl;
auto num_hits = lookAheadForMatchingChars(match_buffer, search_index, idx, stream, streamLoc); auto num_hits = lookAheadForMatchingChars(idx + 1);
if (num_hits >= hitLength) if (num_hits > 0 && num_hits >= hitLength)
{ {
hitLength = num_hits; hitLength = num_hits;
hitOffset = idx + 1; hitOffset = idx + 1;
@ -65,75 +112,163 @@ void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength,
} }
} }
void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator) bool Lz77Encoder::lookAheadSourceEmpty() const
{ {
mCodeGenerator = std::move(generator); if (mLookaheadBuffer.getNumItems() < mLookAheadBufferSize)
{
return true;
}
if (mMaxLookAheadBufferIndex < mLookAheadBufferSize - 1)
{
return true;
}
return false;
}
void Lz77Encoder::populateLookaheadBuffer(unsigned size, bool firstPass)
{
if (!firstPass && lookAheadSourceEmpty())
{
for(unsigned idx=0; idx<size; idx++)
{
mLookaheadBuffer.addItem(0);
mMaxLookAheadBufferIndex--;
}
return;
}
bool stream_finished{false};
unsigned stream_end_id{0};
for(unsigned idx=0; idx<size; idx++)
{
if (!stream_finished)
{
auto byte = mInputStream->readNextByte();
if (!byte)
{
stream_finished = true;
stream_end_id = idx -1;
mLookaheadBuffer.addItem(0);
mMaxLookAheadBufferIndex--;
continue;
}
else
{
mLookaheadBuffer.addItem(*byte);
}
}
else
{
mLookaheadBuffer.addItem(0);
mMaxLookAheadBufferIndex--;
}
}
if (stream_finished && firstPass)
{
mMaxLookAheadBufferIndex = stream_end_id;
}
} }
bool Lz77Encoder::encode() bool Lz77Encoder::encode()
{ {
if (!mCodeGenerator) if (!mCodeGenerator)
{ {
auto code_generator = std::make_unique<HuffmanEncoder>(); mCodeGenerator = std::make_unique<HuffmanEncoder>();
auto huffman_encoder = code_generator.get();
mCodeGenerator = std::move(code_generator);
huffman_encoder->setUseFixedCode(true);
huffman_encoder->initializeLiteralLengthTable();
} }
while(auto byte = mInputStream->readNextByte()) // Fill the lookahead buffer
mMaxLookAheadBufferIndex = mLookAheadBufferSize - 1;
populateLookaheadBuffer(mLookAheadBufferSize, true);
if(mMaxLookAheadBufferIndex < 0)
{ {
const auto code = mCodeGenerator->getLiteralValue(*byte); return true;
std::cout << "Writing value " << static_cast<int>(*byte) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
mOutputStream->writeNBits(code.getData(), code.getLength());
} }
auto eos_code = mCodeGenerator->getEndOfStreamValue(); bool input_stream_ended{false};
std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code.getData(), eos_code.getLength()) << "\n"; while(!hitBufferFull())
mOutputStream->writeNBits(eos_code.getData(), eos_code.getLength());
/*
unsigned loc{0};
std::string ret;
while(!mInputStream->isFinished())
{ {
auto search_char = stream[loc]; if (mMaxLookAheadBufferIndex < 0)
{
input_stream_ended = true;
break;
}
const auto working_byte = mLookaheadBuffer.getItem(0);
std::cout << "Working byte is " << static_cast<int>(working_byte) << std::endl;
unsigned hit_length{0}; unsigned hit_length{0};
unsigned hit_offset{0}; unsigned hit_distance{0};
lookThroughSearchBuffer(search_char, hit_length, hit_offset, stream, loc); lookForMatches(working_byte, hit_length, hit_distance);
if (hit_length > 0) std::cout << "Got hit length " << hit_length << " and distance " << hit_distance << std::endl;
{
ret += "@" + std::to_string(hit_offset) + "L" + std::to_string(hit_length);
loc+=hit_length;
auto hit_loc = mSearchBuffer.size() - hit_offset; const Hit hit{hit_length, hit_distance, working_byte};
for(unsigned idx=hit_loc; idx<hit_loc + hit_length; idx++) mHitBuffer.push_back(hit);
populateSearchBuffer(hit);
if (hit_length == 0)
{ {
mSearchBuffer.push_back(mSearchBuffer[idx]); populateLookaheadBuffer(1);
}
} }
else else
{ {
ret += search_char; populateLookaheadBuffer(hit_length);
mSearchBuffer.push_back(search_char);
loc++;
} }
} }
return ret; return input_stream_ended;
*/
return false;
} }
const std::vector<Lz77Encoder::Hit>& Lz77Encoder::getHitBuffer() const
{
return mHitBuffer;
}
/*
void Lz77Encoder::flushHitBuffer()
{
// If dynamic huffman build trees
if (!mCodeGenerator)
{
mCodeGenerator = std::make_unique<HuffmanEncoder>();
}
// Convert hit buffer to prefix codes and write to output stream
for (const auto& hit : mHitBuffer)
{
const auto& [length, distance, next_char] = hit;
PrefixCode code;
if (length == 0)
{
code = *mCodeGenerator->getLiteralValue(next_char);
std::cout << "Writing symbol " << static_cast<int>(next_char) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
mOutputStream->writeNBits(code.getData(), code.getLength());
}
else
{
code = *mCodeGenerator->getLengthValue(length);
const auto distance_code = mCodeGenerator->getDistanceValue(distance);
std::cout << "Writing length " << length << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
mOutputStream->writeNBits(code.getData(), code.getLength());
std::cout << "Writing distance " << distance << " with code " << ByteUtils::toString(distance_code.getData(), distance_code.getLength()) << "\n";
mOutputStream->writeNBits(distance_code.getData(), distance_code.getLength());
}
}
auto eos_code = mCodeGenerator->getEndOfStreamValue();
std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code->getData(), eos_code->getLength()) << "\n";
mOutputStream->writeNBits(eos_code->getData(), eos_code->getLength());
}
*/
bool Lz77Encoder::decode() bool Lz77Encoder::decode()
{ {
/* /*
@ -203,3 +338,4 @@ bool Lz77Encoder::decode()
*/ */
return false; return false;
} }

View file

@ -2,40 +2,59 @@
#include "AbstractEncoder.h" #include "AbstractEncoder.h"
#include "HuffmanEncoder.h" #include "HuffmanEncoder.h"
#include "CircleBuffer.h"
#include <string> #include <string>
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <tuple>
class PrefixCodeGenerator; class PrefixCodeGenerator;
class Lz77Encoder : public AbstractEncoder class Lz77Encoder : public AbstractEncoder
{ {
public: public:
using Buffer = std::vector<unsigned char>; using Hit = std::tuple<unsigned, unsigned, unsigned char>;
Lz77Encoder(BitStream* inputStream, BitStream* outputStream); Lz77Encoder(BitStream* inputStream, BitStream* outputStream);
unsigned lookAheadForMatchingChars(std::vector<char>& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc);
void lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc);
bool encode() override; bool encode() override;
bool decode() override; bool decode() override;
const std::vector<Hit>& getHitBuffer() const;
void setSearchBufferSize(unsigned size); void setSearchBufferSize(unsigned size);
void setLookAheadBufferSize(unsigned size); void setLookAheadBufferSize(unsigned size);
void setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator); void setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator);
bool hitBufferFull() const;
private: private:
bool lookAheadSourceEmpty() const;
unsigned char getSearchBufferItem(unsigned index) const;
unsigned lookAheadForMatchingChars(unsigned searchIndex);
void lookForMatches(unsigned char searchChar, unsigned& hitLength, unsigned& hitOffset);
void populateLookaheadBuffer(unsigned size, bool firstPass = false);
void populateSearchBuffer(const Hit& hit);
unsigned mMaxHitBufferSize{32000};
std::vector<Hit> mHitBuffer;
unsigned mSearchBufferSize{32000}; unsigned mSearchBufferSize{32000};
Buffer mSearchBuffer; CircleBuffer<unsigned char> mSearchBuffer;
unsigned mLookAheadBufferSize{256}; unsigned mLookAheadBufferSize{256};
Buffer mLookaheadBuffer; int mMaxLookAheadBufferIndex{0};
unsigned mMinLengthMatchSize{1};
CircleBuffer<unsigned char> mLookaheadBuffer;
std::unique_ptr<PrefixCodeGenerator> mCodeGenerator; std::unique_ptr<PrefixCodeGenerator> mCodeGenerator;
}; };

View file

@ -98,3 +98,47 @@ void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& c
std::cout << "********" << std::endl; std::cout << "********" << std::endl;
} }
void HuffmanEncoder::setUseFixedCode(bool useFixed)
{
mUseFixedCode = useFixed;
}
uint32_t HuffmanEncoder::getLengthValue(unsigned length)
{
return 0;
}
std::optional<PrefixCode> HuffmanEncoder::getLiteralValue(unsigned char value) const
{
return mLiteralLengthTable.getCodeForSymbol(value);
}
std::optional<PrefixCode> HuffmanEncoder::getLengthValue(unsigned length) const
{
return mLiteralLengthTable.getCodeForSymbol(length);
}
std::optional<PrefixCode> HuffmanEncoder::getDistanceValue(unsigned distance) const
{
return mDistanceTable.getCodeForSymbol(distance);
}
std::optional<PrefixCode> HuffmanEncoder::getEndOfStreamValue() const
{
return mLiteralLengthTable.getCodeForSymbol(256);
}
void HuffmanEncoder::initializeTrees()
{
initializeLiteralLengthTable();
}
void HuffmanEncoder::initializeLiteralLengthTable()
{
if(mUseFixedCode)
{
mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
mLiteralLengthTable.buildPrefixCodes();
}
}

View file

@ -12,10 +12,11 @@ class PrefixCodeGenerator
{ {
public: public:
virtual ~PrefixCodeGenerator() = default; virtual ~PrefixCodeGenerator() = default;
virtual const PrefixCode& getLiteralValue(unsigned char value) const = 0; virtual std::optional<PrefixCode> getLiteralValue(unsigned char symbol) const = 0;
virtual std::optional<PrefixCode> getLengthValue(unsigned length) const = 0;
virtual const PrefixCode& getEndOfStreamValue() const = 0; virtual std::optional<PrefixCode> getDistanceValue(unsigned distance) const = 0;
virtual std::optional<PrefixCode> getEndOfStreamValue() const = 0;
}; };
class HuffmanEncoder : public PrefixCodeGenerator class HuffmanEncoder : public PrefixCodeGenerator
@ -27,36 +28,23 @@ public:
void encode(const DataStream& stream); void encode(const DataStream& stream);
void encode(const std::unordered_map<unsigned char, unsigned>& counts); void encode(const std::unordered_map<unsigned char, unsigned>& counts);
void setUseFixedCode(bool useFixed) uint32_t getLengthValue(unsigned length);
{
mUseFixedCode = useFixed;
}
uint32_t getLengthValue(unsigned length) std::optional<PrefixCode> getLiteralValue(unsigned char symbol) const override;
{
return 0;
}
const PrefixCode& getLiteralValue(unsigned char value) const override std::optional<PrefixCode> getLengthValue(unsigned length) const override;
{
return mLiteralLengthTable.getCode(value);
}
const PrefixCode& getEndOfStreamValue() const override std::optional<PrefixCode> getDistanceValue(unsigned distance) const override;
{
return mLiteralLengthTable.getCode(256);
}
void initializeLiteralLengthTable() std::optional<PrefixCode> getEndOfStreamValue() const override;
{
if(mUseFixedCode) void initializeTrees();
{
mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false); void setUseFixedCode(bool useFixed);
mLiteralLengthTable.buildPrefixCodes();
}
}
private: private:
void initializeLiteralLengthTable();
void dumpTree(const RawTree<CountPair>& tree) const; void dumpTree(const RawTree<CountPair>& tree) const;
void dumpNode(RawNode<CountPair>* node, unsigned depth) const; void dumpNode(RawNode<CountPair>* node, unsigned depth) const;

View file

@ -9,6 +9,7 @@
#include "PngFilter.h" #include "PngFilter.h"
#include "Lz77Encoder.h" #include "Lz77Encoder.h"
#include "ZlibEncoder.h" #include "ZlibEncoder.h"
#include "HuffmanEncoder.h"
#include "CyclicRedundancyChecker.h" #include "CyclicRedundancyChecker.h"
#include "ByteUtils.h" #include "ByteUtils.h"
@ -181,10 +182,6 @@ void PngWriter::write(const std::unique_ptr<Image<unsigned char> >& image)
auto filter_out_stream = std::make_unique<BufferBitStream>(); auto filter_out_stream = std::make_unique<BufferBitStream>();
PngFilter filter(raw_image_stream, filter_out_stream.get()); PngFilter filter(raw_image_stream, filter_out_stream.get());
filter.encode(); filter.encode();
//while(!filter_out_stream->isFinished())
//{
//std::cout << "Got pix " << static_cast<int>(*filter_out_stream->readNextByte()) << std::endl;
//}
filter_out_stream->resetOffsets(); filter_out_stream->resetOffsets();
@ -199,9 +196,12 @@ void PngWriter::write(const std::unique_ptr<Image<unsigned char> >& image)
lz77_out_stream = std::make_unique<BufferBitStream>(); lz77_out_stream = std::make_unique<BufferBitStream>();
Lz77Encoder lz77_encoder(filter_out_stream.get(), lz77_out_stream.get()); Lz77Encoder lz77_encoder(filter_out_stream.get(), lz77_out_stream.get());
if (mCompressionMethod == Deflate::CompressionMethod::DYNAMIC_HUFFMAN) if (mCompressionMethod == Deflate::CompressionMethod::FIXED_HUFFMAN)
{ {
// Set up custom encoder; auto huffman_encoder = std::make_unique<HuffmanEncoder>();
huffman_encoder->setUseFixedCode(true);
huffman_encoder->initializeTrees();
lz77_encoder.setPrefixCodeGenerator(std::move(huffman_encoder));
} }
lz77_encoder.encode(); lz77_encoder.encode();
lz77_out_stream->resetOffsets(); lz77_out_stream->resetOffsets();

View file

@ -50,7 +50,7 @@ private:
PngInfo mPngInfo; PngInfo mPngInfo;
PngHeader mPngHeader; PngHeader mPngHeader;
Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::NONE}; Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::DYNAMIC_HUFFMAN};
}; };
using PngWriterPtr = std::unique_ptr<PngWriter>; using PngWriterPtr = std::unique_ptr<PngWriter>;

View file

@ -19,6 +19,7 @@ list(APPEND TestFiles
compiler/TestTemplatingEngine.cpp compiler/TestTemplatingEngine.cpp
compression/TestStreamCompressor.cpp compression/TestStreamCompressor.cpp
compression/TestHuffmanStream.cpp compression/TestHuffmanStream.cpp
compression/TestLz77Encoder.cpp
database/TestDatabase.cpp database/TestDatabase.cpp
fonts/TestFontReader.cpp fonts/TestFontReader.cpp
graphics/TestRasterizer.cpp graphics/TestRasterizer.cpp

View file

@ -0,0 +1,32 @@
#include <iostream>
#include "Lz77Encoder.h"
#include "BufferBitStream.h"
int main()
{
std::vector<unsigned> values {0, 10, 11, 12, 10, 11, 12, 0, 13, 14, 15, 10, 11, 12};
//std::vector<unsigned> values {0, 1, 2, 3, 0, 1, 2, 3, 0,1};
BufferBitStream input_stream;
for (auto value : values)
{
input_stream.writeByte(value);
}
BufferBitStream output_stream;
Lz77Encoder encoder(&input_stream, &output_stream);
encoder.encode();
auto hit_buffer = encoder.getHitBuffer();
for(const auto& hit : hit_buffer)
{
const auto& [length, distance, next_char] = hit;
std::cout << "Got hit " << length << " | " << distance << " | " << static_cast<int>(next_char) << std::endl;
}
return 0;
}

View file

@ -28,6 +28,7 @@ void testCompressedPng()
PngWriter writer; PngWriter writer;
writer.setPath("test_compressed.png"); writer.setPath("test_compressed.png");
writer.setCompressionMethod(Deflate::CompressionMethod::NONE);
writer.write(image); writer.write(image);
return; return;
@ -72,9 +73,40 @@ void testFixedPng()
} }
void testDynamicCompressedPng()
{
unsigned width = 10;
unsigned height = 10;
unsigned numChannels = 1;
auto image = Image<unsigned char>::Create(width, height);
image->setNumChannels(numChannels);
image->setBitDepth(8);
std::vector<unsigned char> data(width*height, 0);
for (unsigned idx=0; idx<width*height; idx++)
{
//unsigned char val = 100 * idx /(width*height);
unsigned char val = 10;
data[idx] = val;
}
image->setData(data);
PngWriter writer;
writer.setPath("test_dynamic.png");
writer.write(image);
//return;
File test_file("test_dynamic.png");
std::cout << test_file.dumpBinary();
}
int main() int main()
{ {
//testCompressedPng(); //testCompressedPng();
testFixedPng(); //testFixedPng();
testDynamicCompressedPng();
return 0; return 0;
} }