#include "HuffmanCodeLengthTable.h" #include "ByteUtils.h" #include "RunLengthEncoder.h" #include "BitStream.h" #include #include #include void HuffmanCodeLengthTable::buildCompressedLengthSequence() { RunLengthEncoder rl_encoder; auto rle_encoded = rl_encoder.encode(mInputLengthSequence); for (const auto& entry : rle_encoded) { //std::cout << "Got rle " << static_cast(entry.first) << " | " << entry.second << std::endl; } mCompressedLengthSequence.clear(); for (const auto& entry : rle_encoded) { const auto length = entry.first; const auto count = entry.second; if (count < 3) { for(std::size_t idx=0; idx 10) { mCompressedLengthSequence.push_back({18, remainder_big-11}); } else if(remainder_big > 2) { mCompressedLengthSequence.push_back({17, remainder_big-3}); } else { for(std::size_t idx=0; idx= 3) { mCompressedLengthSequence.push_back({16, remaining_counts - 3}); } else { for(std::size_t idx=0; idx(19, 0); for (const auto& entry : mCompressedLengthSequence) { mCompressedLengthCounts[entry.first]++; } } const std::vector& HuffmanCodeLengthTable::getCompressedLengthSequence() const { return mCompressedLengthSequence; } const std::vector HuffmanCodeLengthTable::getCompressedLengthCounts() const { return mCompressedLengthCounts; } std::optional HuffmanCodeLengthTable::getCodeForSymbol(unsigned symbol) const { return mTree.getCode(symbol); } bool HuffmanCodeLengthTable::readNextSymbol(unsigned& result, BitStream* stream) { if (getNumCodeLengths() == 0) { return false; } std::size_t working_index{0}; auto length = getCodeLength(working_index); auto delta = length; bool found{false}; unsigned char buffer{0}; uint32_t working_bits{0}; unsigned working_symbol{0}; while(!found) { auto valid = stream->readNextNBits(delta, buffer); //std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;; unsigned hold = buffer; working_bits = working_bits | (hold << (length - delta)); //std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl; if (const auto symbol = findMatch(working_index, working_bits)) { found = true; working_symbol = *symbol; } else { working_index++; if (working_index >= getNumCodeLengths()) { break; } auto new_length = getCodeLength(working_index); delta = new_length - length; length = new_length; } } if (found) { result = working_symbol; // std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl; // std::cout << "At Byte offset " << stream->getCurrentByteOffset() << " and bit offset " << stream->getCurrentBitOffset() << std::endl; return true; } else { //std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << " and index " << working_index << std::endl; return false; } } void HuffmanCodeLengthTable::buildPrefixCodes() { if(mInputLengthSequence.empty()) { return; } unsigned char max_length = *std::max_element(mInputLengthSequence.begin(), mInputLengthSequence.end()); std::vector counts(max_length+1, 0); for (const auto length : mInputLengthSequence) { counts[length]++; } counts[0] = 0; uint32_t code{0}; std::vector next_code(max_length + 1, 0); for (unsigned bits = 1; bits <= max_length; bits++) { code = (code + counts[bits-1]) << 1; //std::cout << "Start code for bit " << bits << " is " << ByteUtils::toString(code) << " | dec " << code << " count " << counts[bits-1] << std::endl; next_code[bits] = code; } for(std::size_t idx=0; idx(idx)}); mCodes.push_back(prefix_code); } } mTree.sortTable(); //std::cout << dumpPrefixCodes(); } const PrefixCode& HuffmanCodeLengthTable::getCode(std::size_t index) const { return mCodes[index]; } std::string HuffmanCodeLengthTable::dumpPrefixCodes() const { return mTree.dump(); } std::size_t HuffmanCodeLengthTable::mapToDeflateIndex(std::size_t index) const { if (index>= DEFLATE_PERMUTATION_SIZE) { return 0; } else { return DEFLATE_PERMUTATION[index]; } } std::size_t HuffmanCodeLengthTable::getNumCodeLengths() const { return mTree.getNumCodeLengths(); } std::optional HuffmanCodeLengthTable::findMatch(std::size_t treeIndex, uint32_t code) const { return mTree.findMatch(treeIndex, code); } unsigned HuffmanCodeLengthTable::getCodeLength(std::size_t index) const { return mTree.getCodeLength(index); } void HuffmanCodeLengthTable::setInputLengthSequence(const std::vector& sequence, bool targetDeflate) { mTargetDeflate = targetDeflate; if (targetDeflate) { mInputLengthSequence = std::vector(DEFLATE_PERMUTATION_SIZE, 0); for(std::size_t idx=0; idx(sequence[idx]) << std::endl; } } else { mInputLengthSequence = sequence; } }