diff --git a/src/compression/CMakeLists.txt b/src/compression/CMakeLists.txt index aa60c3e..e4f8954 100644 --- a/src/compression/CMakeLists.txt +++ b/src/compression/CMakeLists.txt @@ -1,7 +1,10 @@ list(APPEND compression_LIB_INCLUDES StreamCompressor.cpp - HuffmanEncoder.cpp + huffman/HuffmanEncoder.cpp + huffman/HuffmanStream.cpp + huffman/HuffmanCodeLengthTable.cpp + huffman/HuffmanTree.cpp RunLengthEncoder.cpp ZlibEncoder.cpp deflate/DeflateEncoder.cpp @@ -15,6 +18,7 @@ add_library(compression SHARED ${compression_LIB_INCLUDES}) target_include_directories(compression PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/deflate + ${CMAKE_CURRENT_SOURCE_DIR}/huffman ) target_link_libraries(compression PUBLIC core) diff --git a/src/compression/HuffmanEncoder.h b/src/compression/HuffmanEncoder.h deleted file mode 100644 index 39c9c7f..0000000 --- a/src/compression/HuffmanEncoder.h +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once - -#include "RawTree.h" - -#include -#include - -class HuffmanEncoder -{ -using DataStream = std::vector; -using CountPair = std::pair; - -public: - void encode(const DataStream& stream); - - void encode(const std::unordered_map& counts); - -private: - void dumpTree(const RawTree& tree) const; - void dumpNode(RawNode* node, unsigned depth) const; -}; diff --git a/src/compression/Lz77Encoder.cpp b/src/compression/Lz77Encoder.cpp index 5fcfe2c..cecf977 100644 --- a/src/compression/Lz77Encoder.cpp +++ b/src/compression/Lz77Encoder.cpp @@ -2,6 +2,10 @@ #include "StringUtils.h" #include "BitStream.h" +#include "ByteUtils.h" +#include "HuffmanEncoder.h" + +#include Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream) : AbstractEncoder(inputStream, outputStream) @@ -61,8 +65,39 @@ void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength, } } +void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr generator) +{ + mCodeGenerator = std::move(generator); +} + bool Lz77Encoder::encode() { + if (!mCodeGenerator) + { + auto code_generator = std::make_unique(); + auto huffman_encoder = code_generator.get(); + + mCodeGenerator = std::move(code_generator); + huffman_encoder->setUseFixedCode(true); + huffman_encoder->initializeLiteralLengthTable(); + } + + while(auto byte = mInputStream->readNextByte()) + { + const auto code = mCodeGenerator->getLiteralValue(*byte); + + std::cout << "Writing value " << static_cast(*byte) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n"; + + mOutputStream->writeNBits(code.getData(), code.getLength()); + } + + auto eos_code = mCodeGenerator->getEndOfStreamValue(); + std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code.getData(), eos_code.getLength()) << "\n"; + + mOutputStream->writeNBits(eos_code.getData(), eos_code.getLength()); + + + /* unsigned loc{0}; std::string ret; diff --git a/src/compression/Lz77Encoder.h b/src/compression/Lz77Encoder.h index 08bf8e7..ae5a80f 100644 --- a/src/compression/Lz77Encoder.h +++ b/src/compression/Lz77Encoder.h @@ -1,9 +1,13 @@ #pragma once #include "AbstractEncoder.h" +#include "HuffmanEncoder.h" #include #include +#include + +class PrefixCodeGenerator; class Lz77Encoder : public AbstractEncoder { @@ -24,10 +28,14 @@ public: void setLookAheadBufferSize(unsigned size); + void setPrefixCodeGenerator(std::unique_ptr generator); + private: unsigned mSearchBufferSize{32000}; Buffer mSearchBuffer; unsigned mLookAheadBufferSize{256}; Buffer mLookaheadBuffer; + + std::unique_ptr mCodeGenerator; }; diff --git a/src/compression/RunLengthEncoder.cpp b/src/compression/RunLengthEncoder.cpp index e69de29..88444d1 100644 --- a/src/compression/RunLengthEncoder.cpp +++ b/src/compression/RunLengthEncoder.cpp @@ -0,0 +1,54 @@ +#include "RunLengthEncoder.h" + +std::vector RunLengthEncoder::encode(const std::vector& input) +{ + std::vector ret; + if (input.empty()) + { + return ret; + } + + char working_char{0}; + unsigned count = 1; + for(unsigned idx=0; idx RunLengthEncoder::decode(const std::vector& input) +{ + std::vector ret; + if (input.empty()) + { + return ret; + } + + for (const auto& hit : input) + { + for(unsigned idx=0; idx< hit.second; idx++) + { + ret.push_back(hit.first); + } + } + return ret; +} + diff --git a/src/compression/RunLengthEncoder.h b/src/compression/RunLengthEncoder.h index c9fb898..d8a7a95 100644 --- a/src/compression/RunLengthEncoder.h +++ b/src/compression/RunLengthEncoder.h @@ -1,110 +1,15 @@ #pragma once -#include "StringUtils.h" - #include -#include class RunLengthEncoder { public: - std::string encode(const std::string& string) - { - std::string ret; - if (string.empty()) - { - return ret; - } + using Hit = std::pair; - char working_char{0}; - unsigned count = 1; - for(unsigned idx=0; idx encode(const std::vector& input); - if (c == working_char) - { - count++; - } - else - { - insertCharacter(ret, working_char, count); - working_char = c; - count = 1; - } - } - insertCharacter(ret, working_char, count); - - return ret; - } - - std::string decode(const std::string& string) - { - std::string ret; - if (string.empty()) - { - return ret; - } - - unsigned count{0}; - while(count < string.size()) - { - auto c = string[count]; - if (c == mDelimiter) - { - count++; - std::string reps; - char working_char{0}; - while(count < string.size()) - { - auto rep_char = string[count]; - count++; - if (StringUtils::IsAlphabetical(rep_char)) - { - working_char = rep_char; - break; - } - else - { - reps += rep_char; - } - } - for (unsigned idx=0; idx decode(const std::vector& input); private: - - void insertCharacter(std::string& output, char c, unsigned count) - { - if (count >= 3) - { - output += mDelimiter + std::to_string(count) + c; - } - else - { - for (unsigned jdx=0;jdxreadNextNBits(delta, buffer); - working_bits = (working_bits << delta) | buffer; - - for(const auto& entry : mCodeLengthMapping[working_index].second) - { - if (entry.first == working_bits) - { - found = true; - working_symbol = entry.second; - break; - } - } - - if (!found) - { - working_index++; - if (working_index >= mCodeLengthMapping.size()) - { - break; - } - - auto new_count = mCodeLengthMapping[working_index].first; - delta = new_count - count; - count = new_count; - } - } - - if (found) - { - final_symbol = working_symbol; - std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl; - std::cout << "At Byte offset " << mInputStream->getCurrentByteOffset() << " and bit offset " << mInputStream->getCurrentBitOffset() << std::endl; - return true; - } - else - { - std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << std::endl; - return false; - } -} - -void DeflateBlock::setCodeLengthAlphabetLengths(const std::vector& lengths) -{ - mCodeLengthAlphabetLengths = lengths; -} - -void DeflateBlock::setCodeLengthLength(unsigned length) -{ - mHclen = length; -} - -void DeflateBlock::setLiteralsTableLength(unsigned length) -{ - mHlit = length; -} - -void DeflateBlock::setDistanceTableLength(unsigned length) -{ - mHdist = length; -} - -void DeflateBlock::setIsFinalBlock(bool isFinal) -{ - mInFinalBlock = isFinal; -} - -void DeflateBlock::flushToStream() -{ - -} - -void DeflateBlock::readLiteralCodeLengths() -{ - std::vector lengths; - unsigned char symbol{0}; - - while(lengths.size() < mHlit) - { - bool valid = readNextCodeLengthSymbol(symbol); - - if (!valid) - { - std::cout << "Hit unknown symbol - bailing out" << std::endl; - break; - } - - if (symbol < 16) - { - lengths.push_back(symbol); - } - else if(symbol == 16) - { - unsigned char num_reps{0}; - mInputStream->readNextNBits(2, num_reps); - - auto last_val = lengths[lengths.size()-1]; - std::cout << "Got val 16 doing " << 3 + num_reps << std::endl; - for(unsigned idx=0; idx< 3 + num_reps; idx++) - { - lengths.push_back(last_val); - } - } - else if(symbol == 17) - { - unsigned char num_reps{0}; - mInputStream->readNextNBits(3, num_reps); - - std::cout << "Got val 17 doing " << 3 + num_reps << std::endl; - for(unsigned idx=0; idx< 3 + num_reps; idx++) - { - lengths.push_back(0); - } - } - else if(symbol == 18) - { - unsigned char num_reps{0}; - mInputStream->readNextNBits(7, num_reps); - - std::cout << "Got val 18 doing " << 11 + num_reps << std::endl; - for(unsigned idx=0; idx< 11 + num_reps; idx++) - { - lengths.push_back(0); - } - } - } -} - -void DeflateBlock::buildCodeLengthMapping() -{ - for(unsigned idx=1; idx<8; idx++) - { - std::vector entries; - for(unsigned jdx=0; jdxreadNextNBits(5, h_lit); - mHlit = h_lit + 257; - std::cout << "Got HLIT " << mHlit << std::endl; - - unsigned char h_dist{0}; - mInputStream->readNextNBits(5, h_dist); - mHdist = h_dist + 1; - std::cout << "Got HDIST " << mHdist << std::endl; - - unsigned char h_clen{0}; - mInputStream->readNextNBits(4, h_clen); - mHclen = h_clen + 4; - std::cout << "Got HCLEN " << mHclen << std::endl; - - mCodeLengthAlphabetLengths = std::vector(19, 0); - unsigned char buffer{0}; - for(unsigned idx = 0; idx< mHclen; idx++) - { - mInputStream->readNextNBits(3, buffer); - mCodeLengthAlphabetLengths[CODE_LENGTH_ALPHABET_PERMUTATION[idx]] = buffer; - std::cout << "Got code length for " << CODE_LENGTH_ALPHABET_PERMUTATION[idx] << " of " << static_cast(buffer) << std::endl; - } - - buildCodeLengthMapping(); - - readLiteralCodeLengths(); -} - std::string DeflateBlock::getMetaData() const { std::stringstream sstr; @@ -237,6 +25,11 @@ std::string DeflateBlock::getMetaData() const return sstr.str(); } +void DeflateBlock::setIsFinalBlock(bool isFinal) +{ + mInFinalBlock = isFinal; +} + bool DeflateBlock::isFinalBlock() const { return mInFinalBlock; @@ -245,7 +38,9 @@ bool DeflateBlock::isFinalBlock() const bool DeflateBlock::read() { auto working_byte = *mInputStream->readNextByte(); - std::cout << "Into process data, starts with: "<< ByteUtils::toString(working_byte) << std::endl; + + std::cout << mInputStream->logNextNBytes(11); + std::cout << "DeflateBlock::read location " << mInputStream->logLocation(); unsigned char final_block{0}; mInputStream->readNextNBits(1, final_block); @@ -257,35 +52,64 @@ bool DeflateBlock::read() if (mCompressionMethod == Deflate::CompressionMethod::NONE) { - auto byte0 = *mInputStream->readNextByte(); - auto byte1 = *mInputStream->readNextByte(); - mUncompressedBlockLength = (byte0 << 8) | byte1; - - std::cout << "Check block 0: " << ByteUtils::toString(byte0) << std::endl; - std::cout << "Check block 1: " << ByteUtils::toString(byte1) << std::endl; - - auto byte2 = *mInputStream->readNextByte(); - auto byte3 = *mInputStream->readNextByte(); - uint16_t len_check = (byte2 << 8) | byte3; - - std::cout << "Check block 2: " << ByteUtils::toString(byte2) << std::endl; - std::cout << "Check block 3: " << ByteUtils::toString(byte3) << std::endl; - //if (!(byte0 ==(~byte2) && byte1 ==(~byte3))) - //{ - //std::cout << "Uncompressed block length check failed - aborting." << std::endl; - //return false; - //} - //else - //{ - for(unsigned idx=0; idxwriteByte(*mInputStream->readNextByte()); - } - //} + return readUncompressedStream(); } + else if(mCompressionMethod == Deflate::CompressionMethod::FIXED_HUFFMAN) + { + return readFixedHuffmanStream(); + } + else if(mCompressionMethod == Deflate::CompressionMethod::DYNAMIC_HUFFMAN) + { + return readDynamicHuffmanStream(); + } + return false; +} + +bool DeflateBlock::readUncompressedStream() +{ + auto byte0 = *mInputStream->readNextByte(); + auto byte1 = *mInputStream->readNextByte(); + mUncompressedBlockLength = (byte0 << 8) | byte1; + + std::cout << "Check block 0: " << ByteUtils::toString(byte0) << std::endl; + std::cout << "Check block 1: " << ByteUtils::toString(byte1) << std::endl; + + auto byte2 = *mInputStream->readNextByte(); + auto byte3 = *mInputStream->readNextByte(); + uint16_t len_check = (byte2 << 8) | byte3; + + std::cout << "Check block 2: " << ByteUtils::toString(byte2) << std::endl; + std::cout << "Check block 3: " << ByteUtils::toString(byte3) << std::endl; + //if (!(byte0 ==(~byte2) && byte1 ==(~byte3))) + //{ + //std::cout << "Uncompressed block length check failed - aborting." << std::endl; + //return false; + //} + //else + //{ + for(unsigned idx=0; idxwriteByte(*mInputStream->readNextByte()); + } + //} return true; } +bool DeflateBlock::readFixedHuffmanStream() +{ + std::cout << "Reading fixed huffman stream" << std::endl; + mHuffmanStream = std::make_unique(mInputStream, mOutputStream); + + mHuffmanStream->generateFixedCodeMapping(); + return mHuffmanStream->decode(); +} + +bool DeflateBlock::readDynamicHuffmanStream() +{ + mHuffmanStream = std::make_unique(mInputStream, mOutputStream); + return mHuffmanStream->decode(); +} + void DeflateBlock::write(uint16_t datalength) { mUncompressedBlockLength = datalength; @@ -296,20 +120,38 @@ void DeflateBlock::write(uint16_t datalength) if (mCompressionMethod == Deflate::CompressionMethod::NONE) { - std::cout << "Writing compression block header " << ByteUtils::toString(working_block) << std::endl; - mOutputStream->writeByte(working_block); - - std::cout << "Writing data length " << mUncompressedBlockLength << " " << ByteUtils::toString(mUncompressedBlockLength) << std::endl; - mOutputStream->writeWord(datalength); - - std::cout << "Writing iverse data length " << ~mUncompressedBlockLength << " " << ByteUtils::toString(~mUncompressedBlockLength) << std::endl; - mOutputStream->writeWord(static_cast(~mUncompressedBlockLength)); - - for(unsigned idx=0; idxwriteNBits(working_block, 3); + while(auto byte = mInputStream->readNextByte()) { - auto byte = *mInputStream->readNextByte(); - //std::cout << "Writing next byte " << static_cast(byte) << std::endl; - mOutputStream->writeByte(byte); + mOutputStream->writeByte(*byte); + } + + if (const auto& remaining_bits = mInputStream->getRemainingBits(); remaining_bits.second > 0) + { + mOutputStream->writeNBits(remaining_bits.first, remaining_bits.second); } } } + +void DeflateBlock::writeUncompressedStream(unsigned char working_byte, uint16_t datalength) +{ + std::cout << "Writing compression block header " << ByteUtils::toString(working_byte) << std::endl; + mOutputStream->writeByte(working_byte); + + std::cout << "Writing data length " << mUncompressedBlockLength << " " << ByteUtils::toString(mUncompressedBlockLength) << std::endl; + mOutputStream->writeWord(datalength); + + std::cout << "Writing iverse data length " << ~mUncompressedBlockLength << " " << ByteUtils::toString(~mUncompressedBlockLength) << std::endl; + mOutputStream->writeWord(static_cast(~mUncompressedBlockLength)); + + for(unsigned idx=0; idxreadNextByte(); + //std::cout << "Writing next byte " << static_cast(byte) << std::endl; + mOutputStream->writeByte(byte); + } +} diff --git a/src/compression/deflate/DeflateBlock.h b/src/compression/deflate/DeflateBlock.h index b5bf074..5658464 100644 --- a/src/compression/deflate/DeflateBlock.h +++ b/src/compression/deflate/DeflateBlock.h @@ -1,9 +1,12 @@ #pragma once #include "DeflateElements.h" +#include "HuffmanStream.h" #include "BitStream.h" +#include + class AbstractChecksumCalculator; class DeflateBlock @@ -11,51 +14,33 @@ class DeflateBlock public: DeflateBlock(BitStream* inputStream, BitStream* outputStream); - void buildCodeLengthMapping(); - std::string getMetaData() const; - void flushToStream(); - bool isFinalBlock() const; bool read(); - void readDynamicHuffmanTable(); - - void readLiteralCodeLengths(); - - bool readNextCodeLengthSymbol(unsigned char& buffer); - - void setCodeLengthAlphabetLengths(const std::vector& lengths); - - void setCodeLengthLength(unsigned length); - - void setLiteralsTableLength(unsigned length); - - void setDistanceTableLength(unsigned length); - void setIsFinalBlock(bool isFinal); + void setCompressionMethod(Deflate::CompressionMethod method) + { + mCompressionMethod = method; + } void write(uint16_t datalength); private: + bool readUncompressedStream(); + bool readFixedHuffmanStream(); + bool readDynamicHuffmanStream(); + + void writeUncompressedStream(unsigned char working_byte, uint16_t datalength); + BitStream* mInputStream; BitStream* mOutputStream; - unsigned mHlit{0}; - unsigned mHdist{0}; - unsigned mHclen{0}; - + std::unique_ptr mHuffmanStream; uint16_t mUncompressedBlockLength{0}; - using CodeLengthEntry = std::pair; - using CodeLengthCountEntry = std::pair >; - std::vector mCodeLengthMapping; - - std::vector mCodeLengthAlphabetLengths; - static constexpr unsigned CODE_LENGTH_ALPHABET_PERMUTATION[19]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - bool mInFinalBlock{false}; Deflate::CompressionMethod mCompressionMethod{Deflate::CompressionMethod::NONE}; }; diff --git a/src/compression/deflate/DeflateEncoder.cpp b/src/compression/deflate/DeflateEncoder.cpp index 18f9031..b797a38 100644 --- a/src/compression/deflate/DeflateEncoder.cpp +++ b/src/compression/deflate/DeflateEncoder.cpp @@ -1,6 +1,7 @@ #include "DeflateEncoder.h" #include "BitStream.h" +#include "ByteUtils.h" #include "DeflateBlock.h" #include "BufferBitStream.h" @@ -22,6 +23,7 @@ bool DeflateEncoder::encode() uint16_t count = 0; BufferBitStream stream; std::unique_ptr working_block = std::make_unique(&stream, mOutputStream); + working_block->setCompressionMethod(mCompressionMethod); AbstractChecksumCalculator* checksum_calc; if (mChecksumCalculators.size() > 0) @@ -38,15 +40,22 @@ bool DeflateEncoder::encode() working_block->write(count); working_block = std::make_unique(&stream, mOutputStream); + working_block->setCompressionMethod(mCompressionMethod); stream.reset(); } if (auto byte = mInputStream->readNextByte()) { + std::cout << "Adding byte " << ByteUtils::toString(*byte) << " to deflate block input" << std::endl; stream.writeByte(*byte); } else { + if (const auto& remaining_bits = mInputStream->getRemainingBits(); remaining_bits.second > 0) + { + stream.writeNBits(remaining_bits.first, remaining_bits.second); + } + stream.resetOffsets(); working_block->setIsFinalBlock(true); @@ -56,7 +65,7 @@ bool DeflateEncoder::encode() } count++; } - + mOutputStream->flushRemainingBits(); mOutputStream->clearChecksumCalculator(); return true; } diff --git a/src/compression/huffman/HuffmanCodeLengthTable.cpp b/src/compression/huffman/HuffmanCodeLengthTable.cpp new file mode 100644 index 0000000..625a7d0 --- /dev/null +++ b/src/compression/huffman/HuffmanCodeLengthTable.cpp @@ -0,0 +1,175 @@ +#include "HuffmanCodeLengthTable.h" + +#include "ByteUtils.h" +#include "RunLengthEncoder.h" + +#include +#include +#include + +void HuffmanCodeLengthTable::buildCompressedLengthSequence() +{ + RunLengthEncoder rl_encoder; + auto rle_encoded = rl_encoder.encode(mInputLengthSequence); + + for (const auto& entry : rle_encoded) + { + std::cout << "Got rle " << static_cast(entry.first) << " | " << entry.second << std::endl; + } + mCompressedLengthSequence.clear(); + + for (const auto& entry : rle_encoded) + { + const auto length = entry.first; + const auto count = entry.second; + if (count < 3) + { + for(unsigned idx=0; idx<3; idx++) + { + mCompressedLengthSequence.push_back({length, 0}); + } + } + else if (length == 0) + { + if(count >=3 && count <=10) + { + mCompressedLengthSequence.push_back({17, count-3}); + } + else + { + mCompressedLengthSequence.push_back({18, count-11}); + } + } + else + { + mCompressedLengthSequence.push_back({length, 0}); + auto num_blocks_of_six = (count-1)/6; + for(unsigned idx=0; idx= 3) + { + mCompressedLengthSequence.push_back({16, remaining_counts - 3}); + } + else + { + for(unsigned idx=0; idx(19, 0); + for (const auto& entry : mCompressedLengthSequence) + { + mCompressedLengthCounts[entry.first]++; + } +} + +const std::vector& HuffmanCodeLengthTable::getCompressedLengthSequence() const +{ + return mCompressedLengthSequence; +} + +const std::vector HuffmanCodeLengthTable::getCompressedLengthCounts() const +{ + return mCompressedLengthCounts; +} + +void HuffmanCodeLengthTable::buildPrefixCodes() +{ + if(mInputLengthSequence.empty()) + { + return; + } + + unsigned char max_length = *std::max_element(mInputLengthSequence.begin(), mInputLengthSequence.end()); + std::vector counts(max_length+1, 0); + for (const auto length : mInputLengthSequence) + { + counts[length]++; + } + counts[0] = 0; + + uint32_t code{0}; + std::vector next_code(max_length + 1, 0); + for (unsigned bits = 1; bits <= max_length; bits++) + { + code = (code + counts[bits-1]) << 1; + next_code[bits] = code; + } + + for(std::size_t idx=0; idx= DEFLATE_PERMUTATION_SIZE) + { + return 0; + } + else + { + return DEFLATE_PERMUTATION[index]; + } +} + +unsigned HuffmanCodeLengthTable::getNumCodeLengths() const +{ + return mTree.getNumCodeLengths(); +} + +std::optional HuffmanCodeLengthTable::findMatch(std::size_t treeIndex, uint32_t code) const +{ + return mTree.findMatch(treeIndex, code); +} + +unsigned HuffmanCodeLengthTable::getCodeLength(std::size_t index) const +{ + return mTree.getCodeLength(index); +} + +void HuffmanCodeLengthTable::setInputLengthSequence(const std::vector& sequence, bool targetDeflate) +{ + mTargetDeflate = targetDeflate; + + if (targetDeflate) + { + mInputLengthSequence = std::vector(DEFLATE_PERMUTATION_SIZE, 0); + for(unsigned idx=0; idx(sequence[idx]) << std::endl; + } + } + else + { + mInputLengthSequence = sequence; + } +} diff --git a/src/compression/huffman/HuffmanCodeLengthTable.h b/src/compression/huffman/HuffmanCodeLengthTable.h new file mode 100644 index 0000000..18624b8 --- /dev/null +++ b/src/compression/huffman/HuffmanCodeLengthTable.h @@ -0,0 +1,50 @@ +#pragma once + +#include "HuffmanTree.h" + +#include +#include +#include + +class HuffmanCodeLengthTable +{ +public: + void buildPrefixCodes(); + + void buildCompressedLengthSequence(); + + std::string dumpPrefixCodes() const; + + std::optional findMatch(std::size_t treeIndex, uint32_t code) const; + + const HuffmanTree& getTree() const; + + const PrefixCode& getCode(std::size_t index) const; + + using CompressedSequenceEntry = std::pair; + const std::vector& getCompressedLengthSequence() const; + + const std::vector getCompressedLengthCounts() const; + + unsigned getNumCodeLengths() const; + + unsigned getCodeLength(std::size_t treeIndex) const; + + unsigned mapToDeflateIndex(unsigned index) const; + + void setInputLengthSequence(const std::vector& sequence, bool targetDeflate = true); + +private: + + HuffmanTree mTree; + bool mTargetDeflate{true}; + + std::vector mInputLengthSequence; + std::vector mCodes; + + std::vector mCompressedLengthSequence; + std::vector mCompressedLengthCounts; + + static constexpr unsigned DEFLATE_PERMUTATION_SIZE{19}; + static constexpr unsigned DEFLATE_PERMUTATION[DEFLATE_PERMUTATION_SIZE]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; +}; diff --git a/src/compression/HuffmanEncoder.cpp b/src/compression/huffman/HuffmanEncoder.cpp similarity index 100% rename from src/compression/HuffmanEncoder.cpp rename to src/compression/huffman/HuffmanEncoder.cpp diff --git a/src/compression/huffman/HuffmanEncoder.h b/src/compression/huffman/HuffmanEncoder.h new file mode 100644 index 0000000..c092ac9 --- /dev/null +++ b/src/compression/huffman/HuffmanEncoder.h @@ -0,0 +1,67 @@ +#pragma once + +#include "RawTree.h" + +#include "HuffmanCodeLengthTable.h" +#include "HuffmanFixedCodes.h" + +#include +#include + +class PrefixCodeGenerator +{ +public: + virtual ~PrefixCodeGenerator() = default; + virtual const PrefixCode& getLiteralValue(unsigned char value) const = 0; + + virtual const PrefixCode& getEndOfStreamValue() const = 0; + +}; + +class HuffmanEncoder : public PrefixCodeGenerator +{ +using DataStream = std::vector; +using CountPair = std::pair; + +public: + void encode(const DataStream& stream); + void encode(const std::unordered_map& counts); + + void setUseFixedCode(bool useFixed) + { + mUseFixedCode = useFixed; + } + + uint32_t getLengthValue(unsigned length) + { + return 0; + } + + const PrefixCode& getLiteralValue(unsigned char value) const override + { + return mLiteralLengthTable.getCode(value); + } + + const PrefixCode& getEndOfStreamValue() const override + { + return mLiteralLengthTable.getCode(256); + } + + void initializeLiteralLengthTable() + { + if(mUseFixedCode) + { + mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false); + mLiteralLengthTable.buildPrefixCodes(); + } + } + +private: + void dumpTree(const RawTree& tree) const; + void dumpNode(RawNode* node, unsigned depth) const; + + bool mUseFixedCode{false}; + bool mTableIsInitialized{false}; + HuffmanCodeLengthTable mLiteralLengthTable; + HuffmanCodeLengthTable mDistanceTable; +}; diff --git a/src/compression/huffman/HuffmanFixedCodes.h b/src/compression/huffman/HuffmanFixedCodes.h new file mode 100644 index 0000000..82ef9db --- /dev/null +++ b/src/compression/huffman/HuffmanFixedCodes.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace HuffmanFixedCodes +{ + inline std::vector getDeflateFixedHuffmanCodes() + { + std::vector > mappings {{144, 8}, {112, 9}, {24, 7}, {8 ,8}}; + std::vector sequence; + for(const auto& entry : mappings) + { + for(unsigned idx=0;idx +#include +#include +#include + +HuffmanStream::HuffmanStream(BitStream* inputStream, BitStream* outputStream) + : mInputStream(inputStream), + mOutputStream(outputStream) +{ + +} + +void HuffmanStream::generateFixedCodeMapping() +{ + mUsingFixedCodes = true; + mCodeLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false); + mCodeLengthTable.buildPrefixCodes(); +} + +bool HuffmanStream::readNextCodeLengthSymbol(unsigned& final_symbol) +{ + if (mCodeLengthTable.getNumCodeLengths() == 0) + { + return false; + } + + unsigned working_index{0}; + auto length = mCodeLengthTable.getCodeLength(working_index); + auto delta = length; + + bool found{false}; + unsigned char buffer{0}; + uint32_t working_bits{0}; + unsigned working_symbol{0}; + + while(!found) + { + auto valid = mInputStream->readNextNBits(delta, buffer); + //std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;; + + working_bits = working_bits | (buffer << (length - delta)); + std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl; + + if (const auto symbol = mCodeLengthTable.findMatch(working_index, working_bits)) + { + found = true; + working_symbol = *symbol; + } + else + { + working_index++; + if (working_index >= mCodeLengthTable.getNumCodeLengths()) + { + break; + } + + auto new_length = mCodeLengthTable.getCodeLength(working_index); + delta = new_length - length; + length = new_length; + } + } + + if (found) + { + final_symbol = working_symbol; + std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl; + std::cout << "At Byte offset " << mInputStream->getCurrentByteOffset() << " and bit offset " << mInputStream->getCurrentBitOffset() << std::endl; + return true; + } + else + { + std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << std::endl; + return false; + } +} + +void HuffmanStream::readLiteralCodeLengths() +{ + std::vector lengths; + unsigned symbol{0}; + + while(lengths.size() < mNumLiterals) + { + bool valid = readNextCodeLengthSymbol(symbol); + + if (!valid) + { + std::cout << "Hit unknown symbol - bailing out" << std::endl; + break; + } + + if (symbol < 16) + { + lengths.push_back(symbol); + } + else if(symbol == 16) + { + unsigned char num_reps{0}; + mInputStream->readNextNBits(2, num_reps); + + auto last_val = lengths[lengths.size()-1]; + std::cout << "Got val 16 doing " << 3 + num_reps << std::endl; + for(unsigned idx=0; idx< 3 + num_reps; idx++) + { + lengths.push_back(last_val); + } + } + else if(symbol == 17) + { + unsigned char num_reps{0}; + mInputStream->readNextNBits(3, num_reps); + + std::cout << "Got val 17 doing " << 3 + num_reps << std::endl; + for(unsigned idx=0; idx< 3 + num_reps; idx++) + { + lengths.push_back(0); + } + } + else if(symbol == 18) + { + unsigned char num_reps{0}; + mInputStream->readNextNBits(7, num_reps); + + std::cout << "Got val 18 doing " << 11 + num_reps << std::endl; + for(unsigned idx=0; idx< 11 + num_reps; idx++) + { + lengths.push_back(0); + } + } + } +} + +bool HuffmanStream::decode() +{ + if (!mUsingFixedCodes) + { + readCodingsTable(); + } + else + { + bool found_end_seq{false}; + unsigned symbol{0}; + while(!found_end_seq) + { + bool valid = readNextCodeLengthSymbol(symbol); + + if (!valid) + { + std::cout << "Hit unknown symbol - bailing out" << std::endl; + break; + } + + if (symbol == 256) + { + found_end_seq = true; + break; + } + } + } + + + + return false; +} + +void HuffmanStream::readCodingsTable() +{ + unsigned char h_lit{0}; + mInputStream->readNextNBits(5, h_lit); + mNumLiterals = h_lit + 257; + std::cout << "Got HLIT " << mNumLiterals << std::endl; + + unsigned char h_dist{0}; + mInputStream->readNextNBits(5, h_dist); + mNumDistances = h_dist + 1; + std::cout << "Got HDIST " << mNumDistances << std::endl; + + unsigned char h_clen{0}; + mInputStream->readNextNBits(4, h_clen); + + auto num_code_lengths = h_clen + 4; + std::cout << "Got HCLEN " << num_code_lengths << std::endl; + + auto sequence = std::vector(num_code_lengths, 0); + unsigned char buffer{0}; + for(unsigned idx = 0; idx< num_code_lengths; idx++) + { + std::cout << "After codings " << mInputStream->logLocation(); + mInputStream->readNextNBits(3, buffer); + sequence[idx] = buffer; + } + + mCodeLengthTable.setInputLengthSequence(sequence, true); + mCodeLengthTable.buildPrefixCodes(); + + readLiteralCodeLengths(); +} diff --git a/src/compression/huffman/HuffmanStream.h b/src/compression/huffman/HuffmanStream.h new file mode 100644 index 0000000..37dd6b3 --- /dev/null +++ b/src/compression/huffman/HuffmanStream.h @@ -0,0 +1,38 @@ +#pragma once + +#include "BitStream.h" +#include "HuffmanCodeLengthTable.h" + +#include +#include + + +class HuffmanStream +{ + +public: + HuffmanStream(BitStream* inputStream, BitStream* outputStream); + + bool decode(); + + void generateFixedCodeMapping(); + + void setCodeLengthAlphabetLengths(const std::vector& lengths); + +private: + void readCodingsTable(); + + void readLiteralCodeLengths(); + + bool readNextCodeLengthSymbol(unsigned& buffer); + + BitStream* mInputStream; + BitStream* mOutputStream; + + unsigned mNumLiterals{0}; // HLIT + 257 + unsigned mNumDistances{0}; // HDIST + 1 + + bool mUsingFixedCodes{false}; + HuffmanCodeLengthTable mCodeLengthTable; + +}; diff --git a/src/compression/huffman/HuffmanTree.cpp b/src/compression/huffman/HuffmanTree.cpp new file mode 100644 index 0000000..60976c3 --- /dev/null +++ b/src/compression/huffman/HuffmanTree.cpp @@ -0,0 +1,106 @@ +#include "HuffmanTree.h" + +#include "ByteUtils.h" + +#include +#include +#include + +PrefixCode::PrefixCode(uint32_t data, unsigned length) + : mLength(length) +{ + mData = ByteUtils::mirror(data, length); +} + +bool PrefixCode::matches(unsigned length, uint32_t code) const +{ + return (mLength == length) && (mData == code); +} + +std::string PrefixCode::toString(bool bitsAsRightToLeft) const +{ + if (bitsAsRightToLeft) + { + if (mLength <=8 ) + { + return ByteUtils::toString(mData).substr(8 - mLength, mLength); + } + else + { + return ByteUtils::toString(mData, mLength); + } + } + else + { + if (mLength <=8 ) + { + return ByteUtils::toString(ByteUtils::mirror(mData, mLength)).substr(0, mLength); + } + else + { + return ByteUtils::toString(mData, mLength); + } + } +} + +void HuffmanTree::addCodeLengthEntry(unsigned length, const CodeSymbolPair& data) +{ + bool found{false}; + for (auto& entry : mTable) + { + if (entry.first == length) + { + entry.second.push_back(data); + found = true; + break; + } + } + + if (!found) + { + mTable.push_back({length, {data}}); + } +} + +void HuffmanTree::sortTable() +{ + std::sort(mTable.begin(), mTable.end(), [](CodeLengthData a, CodeLengthData b){return a.first < b.first;}); +} + +std::optional HuffmanTree::findMatch(std::size_t treeIndex, uint32_t code) const +{ + const auto& legth_data = mTable[treeIndex]; + for(const auto& entry : legth_data.second) + { + //std::cout << "Checking if " << entry.second << " matches code " << ByteUtils::toString(code) << std::endl;; + if (entry.first.matches(legth_data.first, code)) + { + return entry.second; + } + } + return std::nullopt; +} + +std::size_t HuffmanTree::getNumCodeLengths() const +{ + return mTable.size(); +} + +unsigned HuffmanTree::getCodeLength(std::size_t idx) const +{ + return mTable[idx].first; +} + +std::string HuffmanTree::dump(bool bitsAsRightToLeft) const +{ + std::stringstream sstr; + for (const auto& code_length_data : mTable) + { + sstr << "Prefix table for Code Length " << code_length_data.first << " has vals: \n"; + for (const auto& entry : code_length_data.second) + { + sstr << "Code " << entry.first.toString(bitsAsRightToLeft) << " Symbol: " << entry.second << '\n'; + } + } + return sstr.str(); +} diff --git a/src/compression/huffman/HuffmanTree.h b/src/compression/huffman/HuffmanTree.h new file mode 100644 index 0000000..56e7f1c --- /dev/null +++ b/src/compression/huffman/HuffmanTree.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include + +class PrefixCode +{ +public: + PrefixCode(uint32_t data, unsigned length); + + std::string toString(bool bitsAsRightToLeft = true) const; + + bool matches(unsigned length, uint32_t code) const; + + uint32_t getData() const + { + return mData; + } + + unsigned getLength() const + { + return mLength; + } + +private: + unsigned mLength{0}; + uint32_t mData{0}; +}; + +class HuffmanTree +{ +public: + using Symbol = unsigned; + using CodeLength = unsigned; + using CodeSymbolPair = std::pair; + using CodeLengthData = std::pair >; + + void addCodeLengthEntry(unsigned length, const CodeSymbolPair& data); + + std::string dump(bool bitsAsRightToLeft = true) const; + + std::optional findMatch(std::size_t treeIndex, uint32_t code) const; + + std::size_t getNumCodeLengths() const; + + unsigned getCodeLength(std::size_t idx) const; + + void sortTable(); +private: + std::vector mTable; +}; diff --git a/src/core/ByteUtils.cpp b/src/core/ByteUtils.cpp index 7ad14bb..68b3096 100644 --- a/src/core/ByteUtils.cpp +++ b/src/core/ByteUtils.cpp @@ -26,7 +26,20 @@ unsigned char ByteUtils::getByteN(uint32_t input, unsigned n) return (input << 8*n) >> 24; } -unsigned char ByteUtils::getLowerNBits(unsigned char input, unsigned num) +unsigned char ByteUtils::mirror(unsigned char byte, unsigned length) +{ + unsigned char ret{0}; + for(unsigned idx=0; idx 0 && idx % 8 == 0) + { + if (ret.empty()) + { + ret = working; + } + else + { + ret = working + '-' + ret; + } + working = ""; + } + working += getBitN(input, 7 - idx) ? '1' : '0'; + } + + if (length <= 8) + { + ret = working; + } + else if(!working.empty()) + { + ret = working + '-' + ret; } return ret; } diff --git a/src/core/ByteUtils.h b/src/core/ByteUtils.h index a34c2ea..bd07125 100644 --- a/src/core/ByteUtils.h +++ b/src/core/ByteUtils.h @@ -21,17 +21,19 @@ public: static unsigned char getHigherNBits(unsigned char input, unsigned num); - static unsigned char getLowerNBits(unsigned char input, unsigned num); + static unsigned char getLowerNBits(uint32_t input, unsigned num); static unsigned char getTwoBitsAtN(unsigned char input, unsigned n); static unsigned char getMBitsAtN(unsigned char input, unsigned m, unsigned n); - static unsigned char getBitN(unsigned char input, unsigned n); + static bool getBitN(uint32_t input, unsigned n); static unsigned char getFromString(const std::string& string); - static std::string toString(unsigned char c); + static std::string toString(uint32_t input, unsigned length = 8); + + static unsigned char mirror(unsigned char byte, unsigned length=0); static void ReverseBuffer(char* buffer, char* reverse, unsigned size, unsigned targetSize); diff --git a/src/core/streams/BitStream.cpp b/src/core/streams/BitStream.cpp index 2505199..b6323f5 100644 --- a/src/core/streams/BitStream.cpp +++ b/src/core/streams/BitStream.cpp @@ -24,6 +24,7 @@ void BitStream::write(uint32_t data) unsigned num_bytes = sizeof(uint32_t); for(unsigned idx=0; idx 0) + { + unsigned char lower_bits = ByteUtils::getLowerNBits(data, num_left); + mCurrentByte |= lower_bits << mBitOffset; + + writeByte(mCurrentByte, false); + + auto num_bytes = overshoot / 8; + for (unsigned idx=0; idx< num_bytes; idx++) + { + mCurrentByte = ByteUtils::getMBitsAtN(data, overshoot, idx*8 + num_left); + writeByte(mCurrentByte, false); + } + + if (const auto remainder = overshoot % 8; remainder > 0) + { + mCurrentByte = ByteUtils::getMBitsAtN(data, remainder, num_bytes*8 + num_left); + mBitOffset = remainder; + } + else + { + mCurrentByte = 0; + mBitOffset = 0; + } + } + else + { + mCurrentByte |= (static_cast(data) << mBitOffset); + mBitOffset += length; + if (mBitOffset == 8) + { + writeByte(mCurrentByte, false); + mCurrentByte = 0; + mBitOffset = 0; + } + } + +} + bool BitStream::readNextNBits(unsigned n, unsigned char& buffer) { if (mByteOffset < 0) diff --git a/src/core/streams/BitStream.h b/src/core/streams/BitStream.h index fc971e6..9cef91a 100644 --- a/src/core/streams/BitStream.h +++ b/src/core/streams/BitStream.h @@ -21,13 +21,17 @@ public: std::string logNextNBytes(unsigned n) const; + std::string logLocation(); + virtual std::vector peekNextNBytes(unsigned n) const = 0; virtual bool readNextNBits(unsigned n, unsigned char& buffer); virtual std::optional readNextByte() = 0; - virtual void writeByte(unsigned char data) = 0; + virtual void writeNBits(uint32_t data, unsigned length); + + virtual void writeByte(unsigned char data, bool checkOverflow = true) = 0; void write(uint32_t data); @@ -37,6 +41,11 @@ public: void resetOffsets() { + mEndByteOffset = mByteOffset; + mEndBitOffset = mBitOffset; + mEndByte = mCurrentByte; + + mCurrentByte = 0; mByteOffset = -1; mBitOffset = 0; } @@ -47,6 +56,20 @@ public: mCurrentByte = 0; } + void flushRemainingBits() + { + if (mBitOffset > 0) + { + writeByte(mCurrentByte, false); + mBitOffset = 0; + } + } + + std::pair getRemainingBits() const + { + return {mEndByte, mEndBitOffset}; + } + void setChecksumCalculator(AbstractChecksumCalculator* calc) { mChecksumCalculator = calc; @@ -60,8 +83,10 @@ public: protected: int mByteOffset{-1}; unsigned mBitOffset{0}; - unsigned char mCurrentByte{0}; + int mEndByteOffset{-1}; + unsigned mEndBitOffset{0}; + unsigned char mEndByte{0}; AbstractChecksumCalculator* mChecksumCalculator{nullptr}; }; diff --git a/src/core/streams/BufferBitStream.cpp b/src/core/streams/BufferBitStream.cpp index 83225ec..3667244 100644 --- a/src/core/streams/BufferBitStream.cpp +++ b/src/core/streams/BufferBitStream.cpp @@ -1,5 +1,7 @@ #include "BufferBitStream.h" +#include "ByteUtils.h" + #include bool BufferBitStream::isFinished() const @@ -50,13 +52,27 @@ void BufferBitStream::setBuffer(const std::vector& data) mBuffer = data; } -void BufferBitStream::writeByte(unsigned char data) +void BufferBitStream::writeByte(unsigned char data, bool checkOverflow) { + unsigned char out_byte{0}; + if (checkOverflow && mBitOffset > 0) + { + out_byte = ByteUtils::getLowerNBits(mCurrentByte, mBitOffset); + out_byte |= data << mBitOffset; + + mCurrentByte = ByteUtils::getHigherNBits(data, mBitOffset); + } + else + { + out_byte = data; + } + if (mChecksumCalculator) { - mChecksumCalculator->addValue(data); + mChecksumCalculator->addValue(out_byte); } - mBuffer.push_back(data); + std::cout << "Writing byte " << ByteUtils::toString(out_byte) << " had bitoffset of " << mBitOffset << std::endl; + mBuffer.push_back(out_byte); } diff --git a/src/core/streams/BufferBitStream.h b/src/core/streams/BufferBitStream.h index 2ace2da..a5a0520 100644 --- a/src/core/streams/BufferBitStream.h +++ b/src/core/streams/BufferBitStream.h @@ -15,7 +15,7 @@ public: void setBuffer(const std::vector& data); - void writeByte(unsigned char data) override; + void writeByte(unsigned char data, bool checkOverflow = true) override; void writeBytes(const std::vector data) override { diff --git a/src/core/streams/InputBitStream.cpp b/src/core/streams/InputBitStream.cpp index 955311a..c32ab53 100644 --- a/src/core/streams/InputBitStream.cpp +++ b/src/core/streams/InputBitStream.cpp @@ -29,7 +29,7 @@ std::optional InputBitStream::readNextByte() } } -void InputBitStream::writeByte(unsigned char data) +void InputBitStream::writeByte(unsigned char data, bool checkOverflow ) { } diff --git a/src/core/streams/InputBitStream.h b/src/core/streams/InputBitStream.h index 6b762d7..302c047 100644 --- a/src/core/streams/InputBitStream.h +++ b/src/core/streams/InputBitStream.h @@ -14,7 +14,7 @@ class InputBitStream : public BitStream std::optional readNextByte() override; - void writeByte(unsigned char data) override; + void writeByte(unsigned char data, bool checkOverflow = true) override; void writeBytes(const std::vector data) override { diff --git a/src/core/streams/OutputBitStream.cpp b/src/core/streams/OutputBitStream.cpp index 5adf599..9418ede 100644 --- a/src/core/streams/OutputBitStream.cpp +++ b/src/core/streams/OutputBitStream.cpp @@ -22,7 +22,7 @@ std::optional OutputBitStream::readNextByte() return std::nullopt; } -void OutputBitStream::writeByte(unsigned char data) +void OutputBitStream::writeByte(unsigned char data, bool checkOverflow ) { (*mStream) << data; } diff --git a/src/core/streams/OutputBitStream.h b/src/core/streams/OutputBitStream.h index 1a119d5..75215d3 100644 --- a/src/core/streams/OutputBitStream.h +++ b/src/core/streams/OutputBitStream.h @@ -15,7 +15,7 @@ public: std::optional readNextByte() override; - void writeByte(unsigned char data) override; + void writeByte(unsigned char data, bool checkOverflow = true) override; void writeBytes(const std::vector data) override; diff --git a/src/image/ImageBitStream.cpp b/src/image/ImageBitStream.cpp index 2e782ff..4f7db7e 100644 --- a/src/image/ImageBitStream.cpp +++ b/src/image/ImageBitStream.cpp @@ -29,7 +29,7 @@ std::optional ImageBitStream::readNextByte() return val; } -void ImageBitStream::writeByte(unsigned char data) +void ImageBitStream::writeByte(unsigned char data, bool checkOverflow ) { mByteOffset++; diff --git a/src/image/ImageBitStream.h b/src/image/ImageBitStream.h index 86af9ee..f7bd6d2 100644 --- a/src/image/ImageBitStream.h +++ b/src/image/ImageBitStream.h @@ -15,7 +15,7 @@ public: std::optional readNextByte() override; - void writeByte(unsigned char data) override; + void writeByte(unsigned char data, bool checkOverflow = true) override; void writeBytes(const std::vector data) override { diff --git a/src/image/png/PngWriter.cpp b/src/image/png/PngWriter.cpp index 8811edd..ac7ff95 100644 --- a/src/image/png/PngWriter.cpp +++ b/src/image/png/PngWriter.cpp @@ -30,6 +30,11 @@ std::unique_ptr PngWriter::Create() return std::make_unique(); } +void PngWriter::setCompressionMethod(Deflate::CompressionMethod method) +{ + mCompressionMethod = method; +} + void PngWriter::setPath(const Path& path) { mPath = path; @@ -91,6 +96,7 @@ void PngWriter::writeHeader() void PngWriter::writeEndChunk() { + std::cout << "Start writing end chunk" << std::endl; unsigned length{0}; mOutStream->write(length); @@ -145,6 +151,7 @@ void PngWriter::writeDataChunks(const BufferBitStream& buffer) auto crc = crc_check.getChecksum(); std::cout << "Writing idat crc" << crc << std::endl; mOutStream->write(crc); + std::cout << "Finished Writing idat crc" << crc << std::endl; } } @@ -192,12 +199,17 @@ void PngWriter::write(const std::unique_ptr >& image) lz77_out_stream = std::make_unique(); Lz77Encoder lz77_encoder(filter_out_stream.get(), lz77_out_stream.get()); + if (mCompressionMethod == Deflate::CompressionMethod::DYNAMIC_HUFFMAN) + { + // Set up custom encoder; + } lz77_encoder.encode(); lz77_out_stream->resetOffsets(); } BufferBitStream zlib_out_stream; ZlibEncoder zlib_encoder(lz77_out_stream.get(), &zlib_out_stream); + zlib_encoder.setDeflateCompressionMethod(mCompressionMethod); zlib_encoder.encode(); zlib_out_stream.resetOffsets(); diff --git a/src/image/png/PngWriter.h b/src/image/png/PngWriter.h index f0bb851..7e8ae6a 100644 --- a/src/image/png/PngWriter.h +++ b/src/image/png/PngWriter.h @@ -22,6 +22,8 @@ public: static std::unique_ptr Create(); + void setCompressionMethod(Deflate::CompressionMethod method); + void setPath(const Path& path); void setPngInfo(const PngInfo& info); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f0843d7..eca43e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,6 +18,7 @@ list(APPEND TestFiles compiler/TestLexer.cpp compiler/TestTemplatingEngine.cpp compression/TestStreamCompressor.cpp + compression/TestHuffmanStream.cpp database/TestDatabase.cpp fonts/TestFontReader.cpp graphics/TestRasterizer.cpp diff --git a/test/compression/TestHuffmanStream.cpp b/test/compression/TestHuffmanStream.cpp new file mode 100644 index 0000000..465d2c1 --- /dev/null +++ b/test/compression/TestHuffmanStream.cpp @@ -0,0 +1,60 @@ +#include + +#include "HuffmanStream.h" + + +void testHuffmanCodeLengthTable() +{ + HuffmanCodeLengthTable table; + + std::vector > mappings {{144, 8}, {112, 9}, {24, 7}, {8 ,8}}; + std::vector code_length_sequence; + for(const auto& entry : mappings) + { + for(unsigned idx=0;idx -int main() +void testReading() { - std::vector bytes{"11100101", "00110101", "00010001"}; + std::vector bytes{ + "11101101", + "01011101", + "00001001", + "01111111"}; BufferBitStream stream; for(const auto& byte : bytes) @@ -14,17 +18,67 @@ int main() } unsigned char buffer{0} ; - auto valid = stream.readNextNBits(3, buffer); + auto valid = stream.readNextNBits(1, buffer); std::cout << "Slice0 is " << ByteUtils::toString(buffer) << std::endl; - valid = stream.readNextNBits(3, buffer); + valid = stream.readNextNBits(2, buffer); std::cout << "Slice1 is " << ByteUtils::toString(buffer) << std::endl; valid = stream.readNextNBits(5, buffer); std::cout << "Slice2 is " << ByteUtils::toString(buffer) << std::endl; - valid = stream.readNextNBits(7, buffer); + valid = stream.readNextNBits(5, buffer); std::cout << "Slice3 is " << ByteUtils::toString(buffer) << std::endl; + valid = stream.readNextNBits(4, buffer); + std::cout << "Slice3 is " << ByteUtils::toString(buffer) << " and int " << static_cast(buffer) << std::endl; + + valid = stream.readNextNBits(3, buffer); + std::cout << "Slice3 is " << ByteUtils::toString(buffer) << std::endl; +} + +void testWriting() +{ + BufferBitStream stream; + + stream.writeByte(ByteUtils::getFromString("01100000")); + + auto bits0 = ByteUtils::getFromString("00000111"); + stream.writeNBits(bits0, 3); + + stream.writeByte(ByteUtils::getFromString("11110000")); + + auto bits1 = ByteUtils::getFromString("01001101"); + stream.writeNBits(bits1, 7); + + stream.writeByte(ByteUtils::getFromString("11110000")); + + auto bits2 = ByteUtils::getFromString("00000001"); + stream.writeNBits(bits2, 1); + + stream.flushRemainingBits(); + + stream.resetOffsets(); + + auto byte0 = ByteUtils::toString(*stream.readNextByte()); + auto byte1 = ByteUtils::toString(*stream.readNextByte()); + auto byte2 = ByteUtils::toString(*stream.readNextByte()); + auto byte3 = ByteUtils::toString(*stream.readNextByte()); + auto byte4 = ByteUtils::toString(*stream.readNextByte()); + + std::cout << "Got bytes 0 " << byte0 << std::endl; + std::cout << "Got bytes 1 " << byte1 << std::endl; + std::cout << "Got bytes 2 " << byte2 << std::endl; + std::cout << "Got bytes 3 " << byte3 << std::endl; + std::cout << "Got bytes 4 " << byte4 << std::endl; +} + + +int main() +{ + //testReading() + + testWriting(); + return 0; } diff --git a/test/core/TestByteUtils.cpp b/test/core/TestByteUtils.cpp index 6b545a7..69d7129 100644 --- a/test/core/TestByteUtils.cpp +++ b/test/core/TestByteUtils.cpp @@ -24,5 +24,10 @@ int main() std::cout << "Byte2 is " << ByteUtils::toString(byte2) << std::endl; std::cout << "Byte3 is " << ByteUtils::toString(byte3) << std::endl; + std::cout << "Mirroring" << std::endl; + + auto out = ByteUtils::mirror(byte); + std::cout << "Mirror is " << ByteUtils::toString(out) << std::endl; + return 0; } diff --git a/test/image/TestPngReader.cpp b/test/image/TestPngReader.cpp index e046476..a0bcc5d 100644 --- a/test/image/TestPngReader.cpp +++ b/test/image/TestPngReader.cpp @@ -5,25 +5,43 @@ #include "Image.h" #include -int main() +void testThirdParty() { //const auto path = "/home/jmsgrogan/Downloads/test.png"; - //const auto path = "/home/jmsgrogan/Downloads/index.png"; + const auto path = "/home/jmsgrogan/Downloads/index.png"; - const auto path = "/home/jmsgrogan/code/MediaTool-build/bin/test.png"; + //const auto path = "/home/jmsgrogan/code/MediaTool-build/bin/test.png"; - File file(path); - std::cout << file.dumpBinary(); + //File file(path); + //std::cout << file.dumpBinary(); PngReader reader; reader.setPath(path); auto image = reader.read(); - for(unsigned idx=0; idxgetWidth()*image->getBytesPerRow(); idx++) - { - std::cout << "Image val: " << idx << " | " << static_cast(image->getDataRef()[idx]) << std::endl; - } + //for(unsigned idx=0; idxgetWidth()*image->getBytesPerRow(); idx++) + //{ + // std::cout << "Image val: " << idx << " | " << static_cast(image->getDataRef()[idx]) << std::endl; + //} +} + +void testFixedCode() +{ + const auto path = "/home/jmsgrogan/code/MediaTool-build/bin/test_fixed.png"; + + //File file(path); + //std::cout << file.dumpBinary(); + + PngReader reader; + reader.setPath(path); + auto image = reader.read(); +} + +int main() +{ + testThirdParty(); + //testFixedCode(); return 0; } diff --git a/test/image/TestPngWriter.cpp b/test/image/TestPngWriter.cpp index 6ac64a0..297c9cf 100644 --- a/test/image/TestPngWriter.cpp +++ b/test/image/TestPngWriter.cpp @@ -3,11 +3,12 @@ #include "File.h" #include "BitStream.h" +#include "ByteUtils.h" #include "ImagePrimitives.h" #include -int main() +void testCompressedPng() { unsigned width = 20; unsigned height = 20; @@ -26,11 +27,11 @@ int main() image->setData(data); PngWriter writer; - writer.setPath("test.png"); + writer.setPath("test_compressed.png"); writer.write(image); - return 0; - File test_file("test.png"); + return; + File test_file("test_compressed.png"); test_file.SetAccessMode(File::AccessMode::Read); test_file.Open(true); @@ -39,6 +40,41 @@ int main() std::cout << static_cast(*byte) << std::endl; } test_file.Close(); +} +void testFixedPng() +{ + unsigned width = 10; + unsigned height = 10; + unsigned numChannels = 1; + auto image = Image::Create(width, height); + image->setNumChannels(numChannels); + image->setBitDepth(8); + + std::vector data(width*height, 0); + for (unsigned idx=0; idxsetData(data); + + PngWriter writer; + writer.setPath("test_fixed.png"); + writer.setCompressionMethod(Deflate::CompressionMethod::FIXED_HUFFMAN); + writer.write(image); + + //return; + File test_file("test_fixed.png"); + std::cout << test_file.dumpBinary(); + +} + +int main() +{ + //testCompressedPng(); + testFixedPng(); return 0; }