Work on dynamic huffman coding.

This commit is contained in:
James Grogan 2022-11-28 18:05:39 +00:00
parent 7f5009fb5e
commit a6e31c8d39
16 changed files with 456 additions and 95 deletions

View file

@ -2,6 +2,7 @@
#include "ByteUtils.h"
#include "RunLengthEncoder.h"
#include "BitStream.h"
#include <algorithm>
#include <sstream>
@ -24,20 +25,33 @@ void HuffmanCodeLengthTable::buildCompressedLengthSequence()
const auto count = entry.second;
if (count < 3)
{
for(unsigned idx=0; idx<3; idx++)
for(unsigned idx=0; idx<count; idx++)
{
mCompressedLengthSequence.push_back({length, 0});
}
}
else if (length == 0)
{
if(count >=3 && count <=10)
unsigned num_big = count / 138;
for(unsigned idx=0; idx<num_big; idx++)
{
mCompressedLengthSequence.push_back({17, count-3});
mCompressedLengthSequence.push_back({18, 127});
}
auto remainder_big = count % 138;
if (remainder_big > 10)
{
mCompressedLengthSequence.push_back({18, remainder_big-11});
}
else if(remainder_big > 2)
{
mCompressedLengthSequence.push_back({17, remainder_big-3});
}
else
{
mCompressedLengthSequence.push_back({18, count-11});
for(unsigned idx=0; idx<remainder_big; idx++)
{
mCompressedLengthSequence.push_back({0, 0});
}
}
}
else
@ -80,6 +94,69 @@ const std::vector<unsigned> HuffmanCodeLengthTable::getCompressedLengthCounts()
return mCompressedLengthCounts;
}
std::optional<PrefixCode> HuffmanCodeLengthTable::getCodeForSymbol(unsigned symbol) const
{
return mTree.getCode(symbol);
}
bool HuffmanCodeLengthTable::readNextSymbol(unsigned& result, BitStream* stream)
{
if (getNumCodeLengths() == 0)
{
return false;
}
unsigned working_index{0};
auto length = getCodeLength(working_index);
auto delta = length;
bool found{false};
unsigned char buffer{0};
uint32_t working_bits{0};
unsigned working_symbol{0};
while(!found)
{
auto valid = stream->readNextNBits(delta, buffer);
//std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;;
unsigned hold = buffer;
working_bits = working_bits | (hold << (length - delta));
//std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl;
if (const auto symbol = findMatch(working_index, working_bits))
{
found = true;
working_symbol = *symbol;
}
else
{
working_index++;
if (working_index >= getNumCodeLengths())
{
break;
}
auto new_length = getCodeLength(working_index);
delta = new_length - length;
length = new_length;
}
}
if (found)
{
result = working_symbol;
// std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl;
// std::cout << "At Byte offset " << stream->getCurrentByteOffset() << " and bit offset " << stream->getCurrentBitOffset() << std::endl;
return true;
}
else
{
std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << " and index " << working_index << std::endl;
return false;
}
}
void HuffmanCodeLengthTable::buildPrefixCodes()
{
if(mInputLengthSequence.empty())
@ -100,6 +177,7 @@ void HuffmanCodeLengthTable::buildPrefixCodes()
for (unsigned bits = 1; bits <= max_length; bits++)
{
code = (code + counts[bits-1]) << 1;
//std::cout << "Start code for bit " << bits << " is " << ByteUtils::toString(code) << " | dec " << code << " count " << counts[bits-1] << std::endl;
next_code[bits] = code;
}
@ -115,7 +193,7 @@ void HuffmanCodeLengthTable::buildPrefixCodes()
}
}
mTree.sortTable();
//std::cout << dumpPrefixCodes();
std::cout << dumpPrefixCodes();
}
const PrefixCode& HuffmanCodeLengthTable::getCode(std::size_t index) const