diff --git a/src/compression/deflate/DeflateBlock.cpp b/src/compression/deflate/DeflateBlock.cpp index cef4d36..588b26f 100644 --- a/src/compression/deflate/DeflateBlock.cpp +++ b/src/compression/deflate/DeflateBlock.cpp @@ -39,7 +39,7 @@ bool DeflateBlock::read() { auto working_byte = *mInputStream->readNextByte(); - std::cout << mInputStream->logNextNBytes(11); + std::cout << mInputStream->logNextNBytes(60); std::cout << "DeflateBlock::read location " << mInputStream->logLocation(); unsigned char final_block{0}; diff --git a/src/compression/deflate/DeflateEncoder.cpp b/src/compression/deflate/DeflateEncoder.cpp index b797a38..7b4c585 100644 --- a/src/compression/deflate/DeflateEncoder.cpp +++ b/src/compression/deflate/DeflateEncoder.cpp @@ -84,6 +84,6 @@ bool DeflateEncoder::decode() break; } - return false; + return true; } diff --git a/src/compression/huffman/HuffmanCodeLengthTable.cpp b/src/compression/huffman/HuffmanCodeLengthTable.cpp index 625a7d0..bfcc30b 100644 --- a/src/compression/huffman/HuffmanCodeLengthTable.cpp +++ b/src/compression/huffman/HuffmanCodeLengthTable.cpp @@ -2,6 +2,7 @@ #include "ByteUtils.h" #include "RunLengthEncoder.h" +#include "BitStream.h" #include #include @@ -24,20 +25,33 @@ void HuffmanCodeLengthTable::buildCompressedLengthSequence() const auto count = entry.second; if (count < 3) { - for(unsigned idx=0; idx<3; idx++) + for(unsigned idx=0; idx=3 && count <=10) + unsigned num_big = count / 138; + for(unsigned idx=0; idx 10) + { + mCompressedLengthSequence.push_back({18, remainder_big-11}); + } + else if(remainder_big > 2) + { + mCompressedLengthSequence.push_back({17, remainder_big-3}); } else { - mCompressedLengthSequence.push_back({18, count-11}); + for(unsigned idx=0; idx HuffmanCodeLengthTable::getCompressedLengthCounts() return mCompressedLengthCounts; } +std::optional HuffmanCodeLengthTable::getCodeForSymbol(unsigned symbol) const +{ + return mTree.getCode(symbol); +} + +bool HuffmanCodeLengthTable::readNextSymbol(unsigned& result, BitStream* stream) +{ + if (getNumCodeLengths() == 0) + { + return false; + } + + unsigned working_index{0}; + auto length = getCodeLength(working_index); + auto delta = length; + + bool found{false}; + unsigned char buffer{0}; + uint32_t working_bits{0}; + unsigned working_symbol{0}; + + while(!found) + { + auto valid = stream->readNextNBits(delta, buffer); + //std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;; + + unsigned hold = buffer; + working_bits = working_bits | (hold << (length - delta)); + //std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl; + + if (const auto symbol = findMatch(working_index, working_bits)) + { + found = true; + working_symbol = *symbol; + } + else + { + working_index++; + if (working_index >= getNumCodeLengths()) + { + break; + } + + auto new_length = getCodeLength(working_index); + delta = new_length - length; + length = new_length; + } + } + + if (found) + { + result = working_symbol; + // std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl; + // std::cout << "At Byte offset " << stream->getCurrentByteOffset() << " and bit offset " << stream->getCurrentBitOffset() << std::endl; + return true; + } + else + { + std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << " and index " << working_index << std::endl; + return false; + } +} + void HuffmanCodeLengthTable::buildPrefixCodes() { if(mInputLengthSequence.empty()) @@ -100,6 +177,7 @@ void HuffmanCodeLengthTable::buildPrefixCodes() for (unsigned bits = 1; bits <= max_length; bits++) { code = (code + counts[bits-1]) << 1; + //std::cout << "Start code for bit " << bits << " is " << ByteUtils::toString(code) << " | dec " << code << " count " << counts[bits-1] << std::endl; next_code[bits] = code; } @@ -115,7 +193,7 @@ void HuffmanCodeLengthTable::buildPrefixCodes() } } mTree.sortTable(); - //std::cout << dumpPrefixCodes(); + std::cout << dumpPrefixCodes(); } const PrefixCode& HuffmanCodeLengthTable::getCode(std::size_t index) const diff --git a/src/compression/huffman/HuffmanCodeLengthTable.h b/src/compression/huffman/HuffmanCodeLengthTable.h index 18624b8..f8fff09 100644 --- a/src/compression/huffman/HuffmanCodeLengthTable.h +++ b/src/compression/huffman/HuffmanCodeLengthTable.h @@ -6,6 +6,8 @@ #include #include +class BitStream; + class HuffmanCodeLengthTable { public: @@ -21,6 +23,8 @@ public: const PrefixCode& getCode(std::size_t index) const; + std::optional getCodeForSymbol(unsigned symbol) const; + using CompressedSequenceEntry = std::pair; const std::vector& getCompressedLengthSequence() const; @@ -34,6 +38,8 @@ public: void setInputLengthSequence(const std::vector& sequence, bool targetDeflate = true); + bool readNextSymbol(unsigned& buffer, BitStream* stream); + private: HuffmanTree mTree; diff --git a/src/compression/huffman/HuffmanStream.cpp b/src/compression/huffman/HuffmanStream.cpp index d01e104..0c6e566 100644 --- a/src/compression/huffman/HuffmanStream.cpp +++ b/src/compression/huffman/HuffmanStream.cpp @@ -8,6 +8,13 @@ #include #include +std::vector DISTANCE_OFFSETS +{ + 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 258, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, + 6145, 8193, 12289, 16385, 24577 +}; + HuffmanStream::HuffmanStream(BitStream* inputStream, BitStream* outputStream) : mInputStream(inputStream), mOutputStream(outputStream) @@ -22,69 +29,78 @@ void HuffmanStream::generateFixedCodeMapping() mCodeLengthTable.buildPrefixCodes(); } -bool HuffmanStream::readNextCodeLengthSymbol(unsigned& final_symbol) +bool HuffmanStream::readNextCodeLengthSymbol(unsigned& buffer) { - if (mCodeLengthTable.getNumCodeLengths() == 0) + return mCodeLengthTable.readNextSymbol(buffer, mInputStream); +} + +bool HuffmanStream::readNextLiteralSymbol(unsigned& buffer) +{ + return mLiteralTable.readNextSymbol(buffer, mInputStream); +} + +bool HuffmanStream::readNextDistanceSymbol(unsigned& buffer) +{ + unsigned base_symbol{0}; + unsigned char extra_bits{0}; + const auto valid = mDistanceTable.readNextSymbol(base_symbol, mInputStream); + if (!valid) { return false; } - unsigned working_index{0}; - auto length = mCodeLengthTable.getCodeLength(working_index); - auto delta = length; + //std::cout << "Got distance base symbol " << base_symbol << std::endl; - bool found{false}; - unsigned char buffer{0}; - uint32_t working_bits{0}; - unsigned working_symbol{0}; - - while(!found) + if (base_symbol <= 3) { - auto valid = mInputStream->readNextNBits(delta, buffer); - //std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;; - - working_bits = working_bits | (buffer << (length - delta)); - std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl; - - if (const auto symbol = mCodeLengthTable.findMatch(working_index, working_bits)) - { - found = true; - working_symbol = *symbol; - } - else - { - working_index++; - if (working_index >= mCodeLengthTable.getNumCodeLengths()) - { - break; - } - - auto new_length = mCodeLengthTable.getCodeLength(working_index); - delta = new_length - length; - length = new_length; - } - } - - if (found) - { - final_symbol = working_symbol; - std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl; - std::cout << "At Byte offset " << mInputStream->getCurrentByteOffset() << " and bit offset " << mInputStream->getCurrentBitOffset() << std::endl; - return true; + buffer = 1 + base_symbol; } else { - std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << std::endl; - return false; + const auto num_extra_bits = (base_symbol - 3 - 1)/2 + 1; + unsigned extra_sum{0}; + if (num_extra_bits > 8) + { + auto byte_val = *mInputStream->readNextByte(); + mInputStream->readNextNBits(num_extra_bits-8, extra_bits); + extra_sum = extra_bits; + extra_sum = extra_sum << (num_extra_bits - 8); + extra_sum |= byte_val; + } + else + { + mInputStream->readNextNBits(num_extra_bits, extra_bits); + extra_sum = extra_bits; + } + buffer = DISTANCE_OFFSETS[base_symbol - 4] + extra_sum; } + return true; } -void HuffmanStream::readLiteralCodeLengths() +void HuffmanStream::addValue(unsigned value, unsigned& count, unsigned& lastValue, std::vector& literals, unsigned numLiterals, std::vector& distances) { - std::vector lengths; - unsigned symbol{0}; + if (count < mNumLiterals) + { + literals[count] = value; + } + else + { + distances[count - mNumLiterals] = value; + } + lastValue = value; + count++; +} - while(lengths.size() < mNumLiterals) +void HuffmanStream::readCodeLengths() +{ + std::vector literal_lengths(288, 0); + std::vector distance_lengths(32, 0); + unsigned symbol{0}; + unsigned count{0}; + + unsigned last_value{0}; + + while(count < mNumLiterals + mNumDistances) { bool valid = readNextCodeLengthSymbol(symbol); @@ -96,18 +112,17 @@ void HuffmanStream::readLiteralCodeLengths() if (symbol < 16) { - lengths.push_back(symbol); + addValue(symbol, count, last_value, literal_lengths, mNumLiterals, distance_lengths); } else if(symbol == 16) { unsigned char num_reps{0}; mInputStream->readNextNBits(2, num_reps); - auto last_val = lengths[lengths.size()-1]; std::cout << "Got val 16 doing " << 3 + num_reps << std::endl; for(unsigned idx=0; idx< 3 + num_reps; idx++) { - lengths.push_back(last_val); + addValue(last_value, count, last_value, literal_lengths, mNumLiterals, distance_lengths); } } else if(symbol == 17) @@ -118,7 +133,7 @@ void HuffmanStream::readLiteralCodeLengths() std::cout << "Got val 17 doing " << 3 + num_reps << std::endl; for(unsigned idx=0; idx< 3 + num_reps; idx++) { - lengths.push_back(0); + addValue(0, count, last_value, literal_lengths, mNumLiterals, distance_lengths); } } else if(symbol == 18) @@ -129,10 +144,133 @@ void HuffmanStream::readLiteralCodeLengths() std::cout << "Got val 18 doing " << 11 + num_reps << std::endl; for(unsigned idx=0; idx< 11 + num_reps; idx++) { - lengths.push_back(0); + addValue(0, count, last_value, literal_lengths, mNumLiterals, distance_lengths); } } } + + std::cout << "Got final literal length sequence " << std::endl; + for(unsigned idx=0; idx(literal_lengths[idx]) << "," ; + } + std::cout << std::endl; + + std::cout << "Got final distance length sequence " << std::endl; + for(unsigned idx=0; idx(distance_lengths[idx]) << "," ; + } + std::cout << std::endl; + + mLiteralTable.setInputLengthSequence(literal_lengths, false); + mLiteralTable.buildPrefixCodes(); + + mDistanceTable.setInputLengthSequence(distance_lengths, false); + mDistanceTable.buildPrefixCodes(); +} + +void HuffmanStream::copyFromBuffer(unsigned length, unsigned distance) +{ + unsigned offset = mBuffer.size() - 1 - distance; + for(unsigned idx=0; idxwriteByte(symbol); + mBuffer.push_back(symbol); + } +} + +void HuffmanStream::readSymbols() +{ + bool hit_end_stream{false}; + unsigned symbol{0}; + unsigned distance{0}; + while(!hit_end_stream) + { + const auto valid = readNextLiteralSymbol(symbol); + if (!valid) + { + std::cout << "Hit unknown symbol - bailing out" << std::endl; + break; + } + + std::cout << "Got symbol " << symbol << std::endl; + + if(symbol <= 255) + { + mOutputStream->writeByte(symbol); + mBuffer.push_back(symbol); + } + else if(symbol == 256) + { + hit_end_stream = true; + break; + } + else if (symbol <= 264) + { + auto length = 3 + symbol - 257; + const auto valid_dist = readNextDistanceSymbol(distance); + copyFromBuffer(length, distance); + } + else if (symbol <= 268) + { + unsigned char extra{0}; + mInputStream->readNextNBits(1, extra); + + auto length = 11 + 2*(symbol - 265) + extra; + const auto valid_dist = readNextDistanceSymbol(distance); + copyFromBuffer(length, distance); + } + else if (symbol <= 272) + { + unsigned char extra{0}; + mInputStream->readNextNBits(2, extra); + + auto length = 19 + 4*(symbol - 269) + extra; + const auto valid_dist = readNextDistanceSymbol(distance); + copyFromBuffer(length, distance); + } + else if (symbol <= 276) + { + unsigned char extra{0}; + mInputStream->readNextNBits(3, extra); + + auto length = 35 + 8*(symbol - 273) + extra; + const auto valid_dist = readNextDistanceSymbol(distance); + copyFromBuffer(length, distance); + } + else if (symbol <= 280) + { + unsigned char extra{0}; + mInputStream->readNextNBits(4, extra); + + auto length = 67 + 16*(symbol - 277) + extra; + const auto valid_dist = readNextDistanceSymbol(distance); + copyFromBuffer(length, distance); + } + else if (symbol <= 284) + { + unsigned char extra{0}; + mInputStream->readNextNBits(5, extra); + + auto length = 131 + 32*(symbol - 281) + extra; + const auto valid_dist = readNextDistanceSymbol(distance); + copyFromBuffer(length, distance); + } + else if (symbol == 285) + { + auto length = 258; + const auto valid_dist = readNextDistanceSymbol(distance); + copyFromBuffer(length, distance); + } + } + + if (hit_end_stream) + { + std::cout << "Found end of stream ok" << std::endl; + } } bool HuffmanStream::decode() @@ -140,6 +278,14 @@ bool HuffmanStream::decode() if (!mUsingFixedCodes) { readCodingsTable(); + + readSymbols(); + + std::cout << "Got final buffer size " << mBuffer.size() << std::endl; + for(unsigned idx=0; idx< 100; idx++) + { + //std::cout << idx << " | " << mBuffer[idx] << std::endl; + } } else { @@ -163,8 +309,6 @@ bool HuffmanStream::decode() } } - - return false; } @@ -190,13 +334,13 @@ void HuffmanStream::readCodingsTable() unsigned char buffer{0}; for(unsigned idx = 0; idx< num_code_lengths; idx++) { - std::cout << "After codings " << mInputStream->logLocation(); mInputStream->readNextNBits(3, buffer); + std::cout << "Got coding table value " << idx << " | " << static_cast(buffer) << " | " << ByteUtils::toString(buffer) << std::endl; sequence[idx] = buffer; } mCodeLengthTable.setInputLengthSequence(sequence, true); mCodeLengthTable.buildPrefixCodes(); - readLiteralCodeLengths(); + readCodeLengths(); } diff --git a/src/compression/huffman/HuffmanStream.h b/src/compression/huffman/HuffmanStream.h index 37dd6b3..d636493 100644 --- a/src/compression/huffman/HuffmanStream.h +++ b/src/compression/huffman/HuffmanStream.h @@ -22,17 +22,30 @@ public: private: void readCodingsTable(); - void readLiteralCodeLengths(); + void readCodeLengths(); + + void readSymbols(); + + void copyFromBuffer(unsigned length, unsigned distance); + + bool readNextLiteralSymbol(unsigned& buffer); + + bool readNextDistanceSymbol(unsigned& buffer); bool readNextCodeLengthSymbol(unsigned& buffer); + void addValue(unsigned value, unsigned& count, unsigned& lastValue, std::vector& literals, unsigned numLiterals, std::vector& distances); + BitStream* mInputStream; BitStream* mOutputStream; + std::vector mBuffer; + unsigned mNumLiterals{0}; // HLIT + 257 unsigned mNumDistances{0}; // HDIST + 1 bool mUsingFixedCodes{false}; HuffmanCodeLengthTable mCodeLengthTable; - + HuffmanCodeLengthTable mLiteralTable; + HuffmanCodeLengthTable mDistanceTable; }; diff --git a/src/compression/huffman/HuffmanTree.cpp b/src/compression/huffman/HuffmanTree.cpp index 60976c3..bde4878 100644 --- a/src/compression/huffman/HuffmanTree.cpp +++ b/src/compression/huffman/HuffmanTree.cpp @@ -81,6 +81,21 @@ std::optional HuffmanTree::findMatch(std::size_t treeIndex, return std::nullopt; } +std::optional HuffmanTree::getCode(Symbol symbol) const +{ + for(const auto& entry : mTable) + { + for(const auto& data : entry.second) + { + if (data.second == symbol) + { + return data.first; + } + } + } + return std::nullopt; +} + std::size_t HuffmanTree::getNumCodeLengths() const { return mTable.size(); diff --git a/src/compression/huffman/HuffmanTree.h b/src/compression/huffman/HuffmanTree.h index 56e7f1c..7c9562b 100644 --- a/src/compression/huffman/HuffmanTree.h +++ b/src/compression/huffman/HuffmanTree.h @@ -46,6 +46,8 @@ public: unsigned getCodeLength(std::size_t idx) const; + std::optional getCode(Symbol symbol) const; + void sortTable(); private: std::vector mTable; diff --git a/src/core/ByteUtils.cpp b/src/core/ByteUtils.cpp index 68b3096..20126a6 100644 --- a/src/core/ByteUtils.cpp +++ b/src/core/ByteUtils.cpp @@ -26,9 +26,9 @@ unsigned char ByteUtils::getByteN(uint32_t input, unsigned n) return (input << 8*n) >> 24; } -unsigned char ByteUtils::mirror(unsigned char byte, unsigned length) +uint32_t ByteUtils::mirror(uint32_t byte, unsigned length) { - unsigned char ret{0}; + uint32_t ret{0}; for(unsigned idx=0; idx 8) { - if (idx > 0 && idx % 8 == 0) + unsigned overshoot = length - 8; + for(unsigned idx=0; idxaddValue(out_byte); } - std::cout << "Writing byte " << ByteUtils::toString(out_byte) << " had bitoffset of " << mBitOffset << std::endl; + //std::cout << "Writing byte " << ByteUtils::toString(out_byte) << " had bitoffset of " << mBitOffset << std::endl; mBuffer.push_back(out_byte); } diff --git a/src/image/png/PngFilter.h b/src/image/png/PngFilter.h index fd933a4..1075c66 100644 --- a/src/image/png/PngFilter.h +++ b/src/image/png/PngFilter.h @@ -85,6 +85,10 @@ public: { image_stream->writeByte(*byte); } + else + { + //std::cout << "Got filter type " << static_cast(working_filter_type) << std::endl; + } } count++; } diff --git a/test/compression/TestHuffmanStream.cpp b/test/compression/TestHuffmanStream.cpp index 465d2c1..2f5ad11 100644 --- a/test/compression/TestHuffmanStream.cpp +++ b/test/compression/TestHuffmanStream.cpp @@ -1,7 +1,7 @@ #include #include "HuffmanStream.h" - +#include "BufferBitStream.h" void testHuffmanCodeLengthTable() { @@ -31,13 +31,115 @@ void testHuffmanCodeLengthTable() { std::cout << "Slot " << idx << " length " << compressed_lengths[idx] << std::endl; } +} + +void testLiteralsTable() +{ + std::vector lengths = {7,4,4,7,5,5,7,7,6,6,7,6,6,6,8,6,6,8, + 6,6,7,6,8,7,7,7,7,7,7,6,6,7,7,6,6,7,7,8,8,7,7,7,6,6,7,7,7,7,6,7,7,7, + 7,7,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,4,4,5,6,7,7,9,8,8,9,9,8,8,7, + 7,8,7,6,8,9,9,11,10,8,7,7,8,8,7,3,0,9,9,9,9,9,8,6,7,7,7,7,9,5,7,4,7,4,4,4,3,4,4,4,4,4,5,5,6}; + + HuffmanCodeLengthTable table; + table.setInputLengthSequence(lengths, false); + + table.buildCompressedLengthSequence(); + + auto compressed_sequence = table.getCompressedLengthSequence(); + for (auto entry : compressed_sequence) + { + std::cout << "Code " << entry.first << " extra bits " << entry.second << std::endl; + } + + auto compressed_lengths = table.getCompressedLengthCounts(); + for(unsigned idx = 0; idx coding_lengths{4, 0, 6, 7, 3, 2, 4, 2, 7, 4, 6, 3, 0, 6, 0, 0, 0, 0, 0}; + codingTable.setInputLengthSequence(coding_lengths, true); + codingTable.buildPrefixCodes(); + + BufferBitStream out_stream; + out_stream.writeNBits(1, 1); + out_stream.writeNBits(2, 2); + + out_stream.writeNBits(29, 5); + out_stream.writeNBits(29, 5); + out_stream.writeNBits(10, 4); + + /* + std::vector permuted(19, 0); + static constexpr unsigned DEFLATE_PERMUTATION[19]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + unsigned count = 0; + for (auto length : coding_lengths) + { + permuted[DEFLATE_PERMUTATION[count]] = length; + count++; + } + + unsigned skip_count = 0; + for(unsigned idx=0; idx