From af6fad72ebfb98e71574bbe8a4b57920902159ca Mon Sep 17 00:00:00 2001 From: James Grogan Date: Tue, 29 Nov 2022 12:05:08 +0000 Subject: [PATCH] Convert lz77 to use fixed buffer sizes. --- src/compression/Lz77Encoder.cpp | 272 +++++++++++++++------ src/compression/Lz77Encoder.h | 33 ++- src/compression/huffman/HuffmanEncoder.cpp | 44 ++++ src/compression/huffman/HuffmanEncoder.h | 42 ++-- src/image/png/PngWriter.cpp | 12 +- src/image/png/PngWriter.h | 2 +- test/CMakeLists.txt | 1 + test/compression/TestLz77Encoder.cpp | 32 +++ test/image/TestPngWriter.cpp | 34 ++- 9 files changed, 362 insertions(+), 110 deletions(-) create mode 100644 test/compression/TestLz77Encoder.cpp diff --git a/src/compression/Lz77Encoder.cpp b/src/compression/Lz77Encoder.cpp index cecf977..9818a64 100644 --- a/src/compression/Lz77Encoder.cpp +++ b/src/compression/Lz77Encoder.cpp @@ -8,55 +8,102 @@ #include Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream) - : AbstractEncoder(inputStream, outputStream) + : AbstractEncoder(inputStream, outputStream), + mSearchBuffer(mSearchBufferSize), + mLookaheadBuffer(mLookAheadBufferSize) { } - -unsigned Lz77Encoder::lookAheadForMatchingChars(std::vector& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc) +void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr generator) { - auto remaining_size = stream.size() - streamLoc; + mCodeGenerator = std::move(generator); +} - unsigned num_hits{1}; - for (unsigned jdx=1; jdx< remaining_size; jdx++) +bool Lz77Encoder::hitBufferFull() const +{ + return mHitBuffer.size() == mMaxHitBufferSize; +} + +void Lz77Encoder::populateSearchBuffer(const Hit& hit) +{ + const auto& [length, distance, next_char] = hit; + + if (length == 0) { - char buffer_char{0}; - if (searchIndex + jdx < mSearchBuffer.size()) + mSearchBuffer.addItem(next_char); + } + else + { + std::vector new_items(distance, 0); + for(unsigned idx=0 ;idx 0) { - matchBuffer.push_back(buffer_char); - num_hits++; + for(unsigned idx=0; idx(search_offset)); + } + + unsigned char lookahead_char = mLookaheadBuffer.getItem(idx); + + std::cout << "Checking search char " << static_cast(search_char) << " and lookup char " << static_cast(lookahead_char) << std::endl; + if ((lookahead_char != search_char) || (idx == mMaxLookAheadBufferIndex)) + { + if (idx + 1>= mMinLengthMatchSize) + { + length = idx + 1; + } break; } } - return num_hits; + return length; } -void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc) +void Lz77Encoder::lookForMatches(unsigned char searchChar, unsigned& hitLength, unsigned& hitOffset) { - for(unsigned idx=0; idx match_buffer{buffer_char}; - auto num_hits = lookAheadForMatchingChars(match_buffer, search_index, idx, stream, streamLoc); + std::cout << "Looking for hits " << std::endl; + auto num_hits = lookAheadForMatchingChars(idx + 1); - if (num_hits >= hitLength) + if (num_hits > 0 && num_hits >= hitLength) { hitLength = num_hits; hitOffset = idx + 1; @@ -65,75 +112,163 @@ void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength, } } -void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr generator) +bool Lz77Encoder::lookAheadSourceEmpty() const { - mCodeGenerator = std::move(generator); + if (mLookaheadBuffer.getNumItems() < mLookAheadBufferSize) + { + return true; + } + + if (mMaxLookAheadBufferIndex < mLookAheadBufferSize - 1) + { + return true; + } + + return false; +} + +void Lz77Encoder::populateLookaheadBuffer(unsigned size, bool firstPass) +{ + if (!firstPass && lookAheadSourceEmpty()) + { + for(unsigned idx=0; idxreadNextByte(); + if (!byte) + { + stream_finished = true; + stream_end_id = idx -1; + mLookaheadBuffer.addItem(0); + mMaxLookAheadBufferIndex--; + continue; + } + else + { + mLookaheadBuffer.addItem(*byte); + } + } + else + { + mLookaheadBuffer.addItem(0); + mMaxLookAheadBufferIndex--; + } + } + + if (stream_finished && firstPass) + { + mMaxLookAheadBufferIndex = stream_end_id; + } } bool Lz77Encoder::encode() { if (!mCodeGenerator) { - auto code_generator = std::make_unique(); - auto huffman_encoder = code_generator.get(); - - mCodeGenerator = std::move(code_generator); - huffman_encoder->setUseFixedCode(true); - huffman_encoder->initializeLiteralLengthTable(); + mCodeGenerator = std::make_unique(); } - while(auto byte = mInputStream->readNextByte()) + // Fill the lookahead buffer + mMaxLookAheadBufferIndex = mLookAheadBufferSize - 1; + populateLookaheadBuffer(mLookAheadBufferSize, true); + if(mMaxLookAheadBufferIndex < 0) { - const auto code = mCodeGenerator->getLiteralValue(*byte); - - std::cout << "Writing value " << static_cast(*byte) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n"; - - mOutputStream->writeNBits(code.getData(), code.getLength()); + return true; } - auto eos_code = mCodeGenerator->getEndOfStreamValue(); - std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code.getData(), eos_code.getLength()) << "\n"; - - mOutputStream->writeNBits(eos_code.getData(), eos_code.getLength()); - - - - /* - unsigned loc{0}; - std::string ret; - - while(!mInputStream->isFinished()) + bool input_stream_ended{false}; + while(!hitBufferFull()) { - auto search_char = stream[loc]; + if (mMaxLookAheadBufferIndex < 0) + { + input_stream_ended = true; + break; + } + + const auto working_byte = mLookaheadBuffer.getItem(0); + std::cout << "Working byte is " << static_cast(working_byte) << std::endl; unsigned hit_length{0}; - unsigned hit_offset{0}; - lookThroughSearchBuffer(search_char, hit_length, hit_offset, stream, loc); + unsigned hit_distance{0}; + lookForMatches(working_byte, hit_length, hit_distance); - if (hit_length > 0) + std::cout << "Got hit length " << hit_length << " and distance " << hit_distance << std::endl; + + const Hit hit{hit_length, hit_distance, working_byte}; + mHitBuffer.push_back(hit); + + populateSearchBuffer(hit); + if (hit_length == 0) { - ret += "@" + std::to_string(hit_offset) + "L" + std::to_string(hit_length); - loc+=hit_length; - - auto hit_loc = mSearchBuffer.size() - hit_offset; - for(unsigned idx=hit_loc; idx& Lz77Encoder::getHitBuffer() const +{ + return mHitBuffer; +} + +/* +void Lz77Encoder::flushHitBuffer() +{ + // If dynamic huffman build trees + if (!mCodeGenerator) + { + mCodeGenerator = std::make_unique(); + } + + // Convert hit buffer to prefix codes and write to output stream + for (const auto& hit : mHitBuffer) + { + const auto& [length, distance, next_char] = hit; + + PrefixCode code; + if (length == 0) + { + code = *mCodeGenerator->getLiteralValue(next_char); + std::cout << "Writing symbol " << static_cast(next_char) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n"; + + mOutputStream->writeNBits(code.getData(), code.getLength()); + } + else + { + code = *mCodeGenerator->getLengthValue(length); + const auto distance_code = mCodeGenerator->getDistanceValue(distance); + + std::cout << "Writing length " << length << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n"; + mOutputStream->writeNBits(code.getData(), code.getLength()); + + std::cout << "Writing distance " << distance << " with code " << ByteUtils::toString(distance_code.getData(), distance_code.getLength()) << "\n"; + mOutputStream->writeNBits(distance_code.getData(), distance_code.getLength()); + } + } + + auto eos_code = mCodeGenerator->getEndOfStreamValue(); + std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code->getData(), eos_code->getLength()) << "\n"; + + mOutputStream->writeNBits(eos_code->getData(), eos_code->getLength()); +} +*/ + bool Lz77Encoder::decode() { /* @@ -203,3 +338,4 @@ bool Lz77Encoder::decode() */ return false; } + diff --git a/src/compression/Lz77Encoder.h b/src/compression/Lz77Encoder.h index ae5a80f..0d3ec85 100644 --- a/src/compression/Lz77Encoder.h +++ b/src/compression/Lz77Encoder.h @@ -2,40 +2,59 @@ #include "AbstractEncoder.h" #include "HuffmanEncoder.h" +#include "CircleBuffer.h" #include #include #include +#include class PrefixCodeGenerator; class Lz77Encoder : public AbstractEncoder { public: - using Buffer = std::vector; + using Hit = std::tuple; Lz77Encoder(BitStream* inputStream, BitStream* outputStream); - unsigned lookAheadForMatchingChars(std::vector& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc); - - void lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc); - bool encode() override; bool decode() override; + const std::vector& getHitBuffer() const; + void setSearchBufferSize(unsigned size); void setLookAheadBufferSize(unsigned size); void setPrefixCodeGenerator(std::unique_ptr generator); + bool hitBufferFull() const; + private: + + bool lookAheadSourceEmpty() const; + unsigned char getSearchBufferItem(unsigned index) const; + + unsigned lookAheadForMatchingChars(unsigned searchIndex); + + void lookForMatches(unsigned char searchChar, unsigned& hitLength, unsigned& hitOffset); + + void populateLookaheadBuffer(unsigned size, bool firstPass = false); + + void populateSearchBuffer(const Hit& hit); + + unsigned mMaxHitBufferSize{32000}; + std::vector mHitBuffer; + unsigned mSearchBufferSize{32000}; - Buffer mSearchBuffer; + CircleBuffer mSearchBuffer; unsigned mLookAheadBufferSize{256}; - Buffer mLookaheadBuffer; + int mMaxLookAheadBufferIndex{0}; + unsigned mMinLengthMatchSize{1}; + CircleBuffer mLookaheadBuffer; std::unique_ptr mCodeGenerator; }; diff --git a/src/compression/huffman/HuffmanEncoder.cpp b/src/compression/huffman/HuffmanEncoder.cpp index 852f8d7..35d56f5 100644 --- a/src/compression/huffman/HuffmanEncoder.cpp +++ b/src/compression/huffman/HuffmanEncoder.cpp @@ -98,3 +98,47 @@ void HuffmanEncoder::encode(const std::unordered_map& c std::cout << "********" << std::endl; } + +void HuffmanEncoder::setUseFixedCode(bool useFixed) +{ + mUseFixedCode = useFixed; +} + +uint32_t HuffmanEncoder::getLengthValue(unsigned length) +{ + return 0; +} + +std::optional HuffmanEncoder::getLiteralValue(unsigned char value) const +{ + return mLiteralLengthTable.getCodeForSymbol(value); +} + +std::optional HuffmanEncoder::getLengthValue(unsigned length) const +{ + return mLiteralLengthTable.getCodeForSymbol(length); +} + +std::optional HuffmanEncoder::getDistanceValue(unsigned distance) const +{ + return mDistanceTable.getCodeForSymbol(distance); +} + +std::optional HuffmanEncoder::getEndOfStreamValue() const +{ + return mLiteralLengthTable.getCodeForSymbol(256); +} + +void HuffmanEncoder::initializeTrees() +{ + initializeLiteralLengthTable(); +} + +void HuffmanEncoder::initializeLiteralLengthTable() +{ + if(mUseFixedCode) + { + mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false); + mLiteralLengthTable.buildPrefixCodes(); + } +} diff --git a/src/compression/huffman/HuffmanEncoder.h b/src/compression/huffman/HuffmanEncoder.h index c092ac9..87285d4 100644 --- a/src/compression/huffman/HuffmanEncoder.h +++ b/src/compression/huffman/HuffmanEncoder.h @@ -12,10 +12,11 @@ class PrefixCodeGenerator { public: virtual ~PrefixCodeGenerator() = default; - virtual const PrefixCode& getLiteralValue(unsigned char value) const = 0; - - virtual const PrefixCode& getEndOfStreamValue() const = 0; + virtual std::optional getLiteralValue(unsigned char symbol) const = 0; + virtual std::optional getLengthValue(unsigned length) const = 0; + virtual std::optional getDistanceValue(unsigned distance) const = 0; + virtual std::optional getEndOfStreamValue() const = 0; }; class HuffmanEncoder : public PrefixCodeGenerator @@ -27,36 +28,23 @@ public: void encode(const DataStream& stream); void encode(const std::unordered_map& counts); - void setUseFixedCode(bool useFixed) - { - mUseFixedCode = useFixed; - } + uint32_t getLengthValue(unsigned length); - uint32_t getLengthValue(unsigned length) - { - return 0; - } + std::optional getLiteralValue(unsigned char symbol) const override; - const PrefixCode& getLiteralValue(unsigned char value) const override - { - return mLiteralLengthTable.getCode(value); - } + std::optional getLengthValue(unsigned length) const override; - const PrefixCode& getEndOfStreamValue() const override - { - return mLiteralLengthTable.getCode(256); - } + std::optional getDistanceValue(unsigned distance) const override; - void initializeLiteralLengthTable() - { - if(mUseFixedCode) - { - mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false); - mLiteralLengthTable.buildPrefixCodes(); - } - } + std::optional getEndOfStreamValue() const override; + + void initializeTrees(); + + void setUseFixedCode(bool useFixed); private: + void initializeLiteralLengthTable(); + void dumpTree(const RawTree& tree) const; void dumpNode(RawNode* node, unsigned depth) const; diff --git a/src/image/png/PngWriter.cpp b/src/image/png/PngWriter.cpp index ac7ff95..168eb85 100644 --- a/src/image/png/PngWriter.cpp +++ b/src/image/png/PngWriter.cpp @@ -9,6 +9,7 @@ #include "PngFilter.h" #include "Lz77Encoder.h" #include "ZlibEncoder.h" +#include "HuffmanEncoder.h" #include "CyclicRedundancyChecker.h" #include "ByteUtils.h" @@ -181,10 +182,6 @@ void PngWriter::write(const std::unique_ptr >& image) auto filter_out_stream = std::make_unique(); PngFilter filter(raw_image_stream, filter_out_stream.get()); filter.encode(); - //while(!filter_out_stream->isFinished()) - //{ - //std::cout << "Got pix " << static_cast(*filter_out_stream->readNextByte()) << std::endl; - //} filter_out_stream->resetOffsets(); @@ -199,9 +196,12 @@ void PngWriter::write(const std::unique_ptr >& image) lz77_out_stream = std::make_unique(); Lz77Encoder lz77_encoder(filter_out_stream.get(), lz77_out_stream.get()); - if (mCompressionMethod == Deflate::CompressionMethod::DYNAMIC_HUFFMAN) + if (mCompressionMethod == Deflate::CompressionMethod::FIXED_HUFFMAN) { - // Set up custom encoder; + auto huffman_encoder = std::make_unique(); + huffman_encoder->setUseFixedCode(true); + huffman_encoder->initializeTrees(); + lz77_encoder.setPrefixCodeGenerator(std::move(huffman_encoder)); } lz77_encoder.encode(); lz77_out_stream->resetOffsets(); diff --git a/src/image/png/PngWriter.h b/src/image/png/PngWriter.h index 7e8ae6a..c153b24 100644 --- a/src/image/png/PngWriter.h +++ b/src/image/png/PngWriter.h @@ -50,7 +50,7 @@ private: PngInfo mPngInfo; PngHeader mPngHeader; - Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::NONE}; + Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::DYNAMIC_HUFFMAN}; }; using PngWriterPtr = std::unique_ptr; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index eca43e6..3fc9442 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -19,6 +19,7 @@ list(APPEND TestFiles compiler/TestTemplatingEngine.cpp compression/TestStreamCompressor.cpp compression/TestHuffmanStream.cpp + compression/TestLz77Encoder.cpp database/TestDatabase.cpp fonts/TestFontReader.cpp graphics/TestRasterizer.cpp diff --git a/test/compression/TestLz77Encoder.cpp b/test/compression/TestLz77Encoder.cpp new file mode 100644 index 0000000..87b87f8 --- /dev/null +++ b/test/compression/TestLz77Encoder.cpp @@ -0,0 +1,32 @@ +#include + +#include "Lz77Encoder.h" +#include "BufferBitStream.h" + +int main() +{ + std::vector values {0, 10, 11, 12, 10, 11, 12, 0, 13, 14, 15, 10, 11, 12}; + + //std::vector values {0, 1, 2, 3, 0, 1, 2, 3, 0,1}; + + BufferBitStream input_stream; + for (auto value : values) + { + input_stream.writeByte(value); + } + + BufferBitStream output_stream; + + Lz77Encoder encoder(&input_stream, &output_stream); + + encoder.encode(); + + auto hit_buffer = encoder.getHitBuffer(); + for(const auto& hit : hit_buffer) + { + const auto& [length, distance, next_char] = hit; + std::cout << "Got hit " << length << " | " << distance << " | " << static_cast(next_char) << std::endl; + } + + return 0; +} diff --git a/test/image/TestPngWriter.cpp b/test/image/TestPngWriter.cpp index 297c9cf..04e04ab 100644 --- a/test/image/TestPngWriter.cpp +++ b/test/image/TestPngWriter.cpp @@ -28,6 +28,7 @@ void testCompressedPng() PngWriter writer; writer.setPath("test_compressed.png"); + writer.setCompressionMethod(Deflate::CompressionMethod::NONE); writer.write(image); return; @@ -72,9 +73,40 @@ void testFixedPng() } +void testDynamicCompressedPng() +{ + unsigned width = 10; + unsigned height = 10; + unsigned numChannels = 1; + auto image = Image::Create(width, height); + image->setNumChannels(numChannels); + image->setBitDepth(8); + + std::vector data(width*height, 0); + for (unsigned idx=0; idxsetData(data); + + PngWriter writer; + writer.setPath("test_dynamic.png"); + writer.write(image); + + //return; + File test_file("test_dynamic.png"); + std::cout << test_file.dumpBinary(); + +} + int main() { //testCompressedPng(); - testFixedPng(); + //testFixedPng(); + testDynamicCompressedPng(); + return 0; }