Work on dynamic huffman coding.
This commit is contained in:
parent
7f5009fb5e
commit
a6e31c8d39
16 changed files with 456 additions and 95 deletions
|
@ -39,7 +39,7 @@ bool DeflateBlock::read()
|
||||||
{
|
{
|
||||||
auto working_byte = *mInputStream->readNextByte();
|
auto working_byte = *mInputStream->readNextByte();
|
||||||
|
|
||||||
std::cout << mInputStream->logNextNBytes(11);
|
std::cout << mInputStream->logNextNBytes(60);
|
||||||
std::cout << "DeflateBlock::read location " << mInputStream->logLocation();
|
std::cout << "DeflateBlock::read location " << mInputStream->logLocation();
|
||||||
|
|
||||||
unsigned char final_block{0};
|
unsigned char final_block{0};
|
||||||
|
|
|
@ -84,6 +84,6 @@ bool DeflateEncoder::decode()
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
#include "ByteUtils.h"
|
#include "ByteUtils.h"
|
||||||
#include "RunLengthEncoder.h"
|
#include "RunLengthEncoder.h"
|
||||||
|
#include "BitStream.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
@ -24,20 +25,33 @@ void HuffmanCodeLengthTable::buildCompressedLengthSequence()
|
||||||
const auto count = entry.second;
|
const auto count = entry.second;
|
||||||
if (count < 3)
|
if (count < 3)
|
||||||
{
|
{
|
||||||
for(unsigned idx=0; idx<3; idx++)
|
for(unsigned idx=0; idx<count; idx++)
|
||||||
{
|
{
|
||||||
mCompressedLengthSequence.push_back({length, 0});
|
mCompressedLengthSequence.push_back({length, 0});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (length == 0)
|
else if (length == 0)
|
||||||
{
|
{
|
||||||
if(count >=3 && count <=10)
|
unsigned num_big = count / 138;
|
||||||
|
for(unsigned idx=0; idx<num_big; idx++)
|
||||||
{
|
{
|
||||||
mCompressedLengthSequence.push_back({17, count-3});
|
mCompressedLengthSequence.push_back({18, 127});
|
||||||
|
}
|
||||||
|
auto remainder_big = count % 138;
|
||||||
|
if (remainder_big > 10)
|
||||||
|
{
|
||||||
|
mCompressedLengthSequence.push_back({18, remainder_big-11});
|
||||||
|
}
|
||||||
|
else if(remainder_big > 2)
|
||||||
|
{
|
||||||
|
mCompressedLengthSequence.push_back({17, remainder_big-3});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mCompressedLengthSequence.push_back({18, count-11});
|
for(unsigned idx=0; idx<remainder_big; idx++)
|
||||||
|
{
|
||||||
|
mCompressedLengthSequence.push_back({0, 0});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -80,6 +94,69 @@ const std::vector<unsigned> HuffmanCodeLengthTable::getCompressedLengthCounts()
|
||||||
return mCompressedLengthCounts;
|
return mCompressedLengthCounts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<PrefixCode> HuffmanCodeLengthTable::getCodeForSymbol(unsigned symbol) const
|
||||||
|
{
|
||||||
|
return mTree.getCode(symbol);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HuffmanCodeLengthTable::readNextSymbol(unsigned& result, BitStream* stream)
|
||||||
|
{
|
||||||
|
if (getNumCodeLengths() == 0)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned working_index{0};
|
||||||
|
auto length = getCodeLength(working_index);
|
||||||
|
auto delta = length;
|
||||||
|
|
||||||
|
bool found{false};
|
||||||
|
unsigned char buffer{0};
|
||||||
|
uint32_t working_bits{0};
|
||||||
|
unsigned working_symbol{0};
|
||||||
|
|
||||||
|
while(!found)
|
||||||
|
{
|
||||||
|
auto valid = stream->readNextNBits(delta, buffer);
|
||||||
|
//std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;;
|
||||||
|
|
||||||
|
unsigned hold = buffer;
|
||||||
|
working_bits = working_bits | (hold << (length - delta));
|
||||||
|
//std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl;
|
||||||
|
|
||||||
|
if (const auto symbol = findMatch(working_index, working_bits))
|
||||||
|
{
|
||||||
|
found = true;
|
||||||
|
working_symbol = *symbol;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
working_index++;
|
||||||
|
if (working_index >= getNumCodeLengths())
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto new_length = getCodeLength(working_index);
|
||||||
|
delta = new_length - length;
|
||||||
|
length = new_length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found)
|
||||||
|
{
|
||||||
|
result = working_symbol;
|
||||||
|
// std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl;
|
||||||
|
// std::cout << "At Byte offset " << stream->getCurrentByteOffset() << " and bit offset " << stream->getCurrentBitOffset() << std::endl;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << " and index " << working_index << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void HuffmanCodeLengthTable::buildPrefixCodes()
|
void HuffmanCodeLengthTable::buildPrefixCodes()
|
||||||
{
|
{
|
||||||
if(mInputLengthSequence.empty())
|
if(mInputLengthSequence.empty())
|
||||||
|
@ -100,6 +177,7 @@ void HuffmanCodeLengthTable::buildPrefixCodes()
|
||||||
for (unsigned bits = 1; bits <= max_length; bits++)
|
for (unsigned bits = 1; bits <= max_length; bits++)
|
||||||
{
|
{
|
||||||
code = (code + counts[bits-1]) << 1;
|
code = (code + counts[bits-1]) << 1;
|
||||||
|
//std::cout << "Start code for bit " << bits << " is " << ByteUtils::toString(code) << " | dec " << code << " count " << counts[bits-1] << std::endl;
|
||||||
next_code[bits] = code;
|
next_code[bits] = code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,7 +193,7 @@ void HuffmanCodeLengthTable::buildPrefixCodes()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mTree.sortTable();
|
mTree.sortTable();
|
||||||
//std::cout << dumpPrefixCodes();
|
std::cout << dumpPrefixCodes();
|
||||||
}
|
}
|
||||||
|
|
||||||
const PrefixCode& HuffmanCodeLengthTable::getCode(std::size_t index) const
|
const PrefixCode& HuffmanCodeLengthTable::getCode(std::size_t index) const
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
|
||||||
|
class BitStream;
|
||||||
|
|
||||||
class HuffmanCodeLengthTable
|
class HuffmanCodeLengthTable
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -21,6 +23,8 @@ public:
|
||||||
|
|
||||||
const PrefixCode& getCode(std::size_t index) const;
|
const PrefixCode& getCode(std::size_t index) const;
|
||||||
|
|
||||||
|
std::optional<PrefixCode> getCodeForSymbol(unsigned symbol) const;
|
||||||
|
|
||||||
using CompressedSequenceEntry = std::pair<unsigned, unsigned>;
|
using CompressedSequenceEntry = std::pair<unsigned, unsigned>;
|
||||||
const std::vector<CompressedSequenceEntry>& getCompressedLengthSequence() const;
|
const std::vector<CompressedSequenceEntry>& getCompressedLengthSequence() const;
|
||||||
|
|
||||||
|
@ -34,6 +38,8 @@ public:
|
||||||
|
|
||||||
void setInputLengthSequence(const std::vector<unsigned char>& sequence, bool targetDeflate = true);
|
void setInputLengthSequence(const std::vector<unsigned char>& sequence, bool targetDeflate = true);
|
||||||
|
|
||||||
|
bool readNextSymbol(unsigned& buffer, BitStream* stream);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
HuffmanTree mTree;
|
HuffmanTree mTree;
|
||||||
|
|
|
@ -8,6 +8,13 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
|
std::vector<unsigned> DISTANCE_OFFSETS
|
||||||
|
{
|
||||||
|
5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
|
||||||
|
258, 385, 513, 769, 1025, 1537, 2049, 3073, 4097,
|
||||||
|
6145, 8193, 12289, 16385, 24577
|
||||||
|
};
|
||||||
|
|
||||||
HuffmanStream::HuffmanStream(BitStream* inputStream, BitStream* outputStream)
|
HuffmanStream::HuffmanStream(BitStream* inputStream, BitStream* outputStream)
|
||||||
: mInputStream(inputStream),
|
: mInputStream(inputStream),
|
||||||
mOutputStream(outputStream)
|
mOutputStream(outputStream)
|
||||||
|
@ -22,69 +29,78 @@ void HuffmanStream::generateFixedCodeMapping()
|
||||||
mCodeLengthTable.buildPrefixCodes();
|
mCodeLengthTable.buildPrefixCodes();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HuffmanStream::readNextCodeLengthSymbol(unsigned& final_symbol)
|
bool HuffmanStream::readNextCodeLengthSymbol(unsigned& buffer)
|
||||||
{
|
{
|
||||||
if (mCodeLengthTable.getNumCodeLengths() == 0)
|
return mCodeLengthTable.readNextSymbol(buffer, mInputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HuffmanStream::readNextLiteralSymbol(unsigned& buffer)
|
||||||
|
{
|
||||||
|
return mLiteralTable.readNextSymbol(buffer, mInputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HuffmanStream::readNextDistanceSymbol(unsigned& buffer)
|
||||||
|
{
|
||||||
|
unsigned base_symbol{0};
|
||||||
|
unsigned char extra_bits{0};
|
||||||
|
const auto valid = mDistanceTable.readNextSymbol(base_symbol, mInputStream);
|
||||||
|
if (!valid)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned working_index{0};
|
//std::cout << "Got distance base symbol " << base_symbol << std::endl;
|
||||||
auto length = mCodeLengthTable.getCodeLength(working_index);
|
|
||||||
auto delta = length;
|
|
||||||
|
|
||||||
bool found{false};
|
if (base_symbol <= 3)
|
||||||
unsigned char buffer{0};
|
|
||||||
uint32_t working_bits{0};
|
|
||||||
unsigned working_symbol{0};
|
|
||||||
|
|
||||||
while(!found)
|
|
||||||
{
|
{
|
||||||
auto valid = mInputStream->readNextNBits(delta, buffer);
|
buffer = 1 + base_symbol;
|
||||||
//std::cout << "Got buffer " << ByteUtils::toString(buffer) << std::endl;;
|
|
||||||
|
|
||||||
working_bits = working_bits | (buffer << (length - delta));
|
|
||||||
std::cout << "Read " << delta << " bits with length " << length << " and value " << ByteUtils::toString(working_bits) << std::endl;
|
|
||||||
|
|
||||||
if (const auto symbol = mCodeLengthTable.findMatch(working_index, working_bits))
|
|
||||||
{
|
|
||||||
found = true;
|
|
||||||
working_symbol = *symbol;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
working_index++;
|
|
||||||
if (working_index >= mCodeLengthTable.getNumCodeLengths())
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto new_length = mCodeLengthTable.getCodeLength(working_index);
|
|
||||||
delta = new_length - length;
|
|
||||||
length = new_length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (found)
|
|
||||||
{
|
|
||||||
final_symbol = working_symbol;
|
|
||||||
std::cout << "Found symbol " << working_symbol << " with bits " << ByteUtils::toString(working_bits) << std::endl;
|
|
||||||
std::cout << "At Byte offset " << mInputStream->getCurrentByteOffset() << " and bit offset " << mInputStream->getCurrentBitOffset() << std::endl;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::cout << "SYMBOL NOT FOUND " << " with bits " << ByteUtils::toString(working_bits) << std::endl;
|
const auto num_extra_bits = (base_symbol - 3 - 1)/2 + 1;
|
||||||
return false;
|
unsigned extra_sum{0};
|
||||||
|
if (num_extra_bits > 8)
|
||||||
|
{
|
||||||
|
auto byte_val = *mInputStream->readNextByte();
|
||||||
|
mInputStream->readNextNBits(num_extra_bits-8, extra_bits);
|
||||||
|
extra_sum = extra_bits;
|
||||||
|
extra_sum = extra_sum << (num_extra_bits - 8);
|
||||||
|
extra_sum |= byte_val;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mInputStream->readNextNBits(num_extra_bits, extra_bits);
|
||||||
|
extra_sum = extra_bits;
|
||||||
|
}
|
||||||
|
buffer = DISTANCE_OFFSETS[base_symbol - 4] + extra_sum;
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HuffmanStream::readLiteralCodeLengths()
|
void HuffmanStream::addValue(unsigned value, unsigned& count, unsigned& lastValue, std::vector<unsigned char>& literals, unsigned numLiterals, std::vector<unsigned char>& distances)
|
||||||
{
|
{
|
||||||
std::vector<unsigned> lengths;
|
if (count < mNumLiterals)
|
||||||
unsigned symbol{0};
|
{
|
||||||
|
literals[count] = value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
distances[count - mNumLiterals] = value;
|
||||||
|
}
|
||||||
|
lastValue = value;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
while(lengths.size() < mNumLiterals)
|
void HuffmanStream::readCodeLengths()
|
||||||
|
{
|
||||||
|
std::vector<unsigned char> literal_lengths(288, 0);
|
||||||
|
std::vector<unsigned char> distance_lengths(32, 0);
|
||||||
|
unsigned symbol{0};
|
||||||
|
unsigned count{0};
|
||||||
|
|
||||||
|
unsigned last_value{0};
|
||||||
|
|
||||||
|
while(count < mNumLiterals + mNumDistances)
|
||||||
{
|
{
|
||||||
bool valid = readNextCodeLengthSymbol(symbol);
|
bool valid = readNextCodeLengthSymbol(symbol);
|
||||||
|
|
||||||
|
@ -96,18 +112,17 @@ void HuffmanStream::readLiteralCodeLengths()
|
||||||
|
|
||||||
if (symbol < 16)
|
if (symbol < 16)
|
||||||
{
|
{
|
||||||
lengths.push_back(symbol);
|
addValue(symbol, count, last_value, literal_lengths, mNumLiterals, distance_lengths);
|
||||||
}
|
}
|
||||||
else if(symbol == 16)
|
else if(symbol == 16)
|
||||||
{
|
{
|
||||||
unsigned char num_reps{0};
|
unsigned char num_reps{0};
|
||||||
mInputStream->readNextNBits(2, num_reps);
|
mInputStream->readNextNBits(2, num_reps);
|
||||||
|
|
||||||
auto last_val = lengths[lengths.size()-1];
|
|
||||||
std::cout << "Got val 16 doing " << 3 + num_reps << std::endl;
|
std::cout << "Got val 16 doing " << 3 + num_reps << std::endl;
|
||||||
for(unsigned idx=0; idx< 3 + num_reps; idx++)
|
for(unsigned idx=0; idx< 3 + num_reps; idx++)
|
||||||
{
|
{
|
||||||
lengths.push_back(last_val);
|
addValue(last_value, count, last_value, literal_lengths, mNumLiterals, distance_lengths);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(symbol == 17)
|
else if(symbol == 17)
|
||||||
|
@ -118,7 +133,7 @@ void HuffmanStream::readLiteralCodeLengths()
|
||||||
std::cout << "Got val 17 doing " << 3 + num_reps << std::endl;
|
std::cout << "Got val 17 doing " << 3 + num_reps << std::endl;
|
||||||
for(unsigned idx=0; idx< 3 + num_reps; idx++)
|
for(unsigned idx=0; idx< 3 + num_reps; idx++)
|
||||||
{
|
{
|
||||||
lengths.push_back(0);
|
addValue(0, count, last_value, literal_lengths, mNumLiterals, distance_lengths);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(symbol == 18)
|
else if(symbol == 18)
|
||||||
|
@ -129,10 +144,133 @@ void HuffmanStream::readLiteralCodeLengths()
|
||||||
std::cout << "Got val 18 doing " << 11 + num_reps << std::endl;
|
std::cout << "Got val 18 doing " << 11 + num_reps << std::endl;
|
||||||
for(unsigned idx=0; idx< 11 + num_reps; idx++)
|
for(unsigned idx=0; idx< 11 + num_reps; idx++)
|
||||||
{
|
{
|
||||||
lengths.push_back(0);
|
addValue(0, count, last_value, literal_lengths, mNumLiterals, distance_lengths);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout << "Got final literal length sequence " << std::endl;
|
||||||
|
for(unsigned idx=0; idx<literal_lengths.size(); idx++)
|
||||||
|
{
|
||||||
|
std::cout << static_cast<int>(literal_lengths[idx]) << "," ;
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
std::cout << "Got final distance length sequence " << std::endl;
|
||||||
|
for(unsigned idx=0; idx<distance_lengths.size(); idx++)
|
||||||
|
{
|
||||||
|
std::cout << static_cast<int>(distance_lengths[idx]) << "," ;
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
mLiteralTable.setInputLengthSequence(literal_lengths, false);
|
||||||
|
mLiteralTable.buildPrefixCodes();
|
||||||
|
|
||||||
|
mDistanceTable.setInputLengthSequence(distance_lengths, false);
|
||||||
|
mDistanceTable.buildPrefixCodes();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HuffmanStream::copyFromBuffer(unsigned length, unsigned distance)
|
||||||
|
{
|
||||||
|
unsigned offset = mBuffer.size() - 1 - distance;
|
||||||
|
for(unsigned idx=0; idx<length; idx++)
|
||||||
|
{
|
||||||
|
auto symbol = mBuffer[offset + idx];
|
||||||
|
|
||||||
|
mOutputStream->writeByte(symbol);
|
||||||
|
mBuffer.push_back(symbol);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void HuffmanStream::readSymbols()
|
||||||
|
{
|
||||||
|
bool hit_end_stream{false};
|
||||||
|
unsigned symbol{0};
|
||||||
|
unsigned distance{0};
|
||||||
|
while(!hit_end_stream)
|
||||||
|
{
|
||||||
|
const auto valid = readNextLiteralSymbol(symbol);
|
||||||
|
if (!valid)
|
||||||
|
{
|
||||||
|
std::cout << "Hit unknown symbol - bailing out" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Got symbol " << symbol << std::endl;
|
||||||
|
|
||||||
|
if(symbol <= 255)
|
||||||
|
{
|
||||||
|
mOutputStream->writeByte(symbol);
|
||||||
|
mBuffer.push_back(symbol);
|
||||||
|
}
|
||||||
|
else if(symbol == 256)
|
||||||
|
{
|
||||||
|
hit_end_stream = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (symbol <= 264)
|
||||||
|
{
|
||||||
|
auto length = 3 + symbol - 257;
|
||||||
|
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||||
|
copyFromBuffer(length, distance);
|
||||||
|
}
|
||||||
|
else if (symbol <= 268)
|
||||||
|
{
|
||||||
|
unsigned char extra{0};
|
||||||
|
mInputStream->readNextNBits(1, extra);
|
||||||
|
|
||||||
|
auto length = 11 + 2*(symbol - 265) + extra;
|
||||||
|
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||||
|
copyFromBuffer(length, distance);
|
||||||
|
}
|
||||||
|
else if (symbol <= 272)
|
||||||
|
{
|
||||||
|
unsigned char extra{0};
|
||||||
|
mInputStream->readNextNBits(2, extra);
|
||||||
|
|
||||||
|
auto length = 19 + 4*(symbol - 269) + extra;
|
||||||
|
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||||
|
copyFromBuffer(length, distance);
|
||||||
|
}
|
||||||
|
else if (symbol <= 276)
|
||||||
|
{
|
||||||
|
unsigned char extra{0};
|
||||||
|
mInputStream->readNextNBits(3, extra);
|
||||||
|
|
||||||
|
auto length = 35 + 8*(symbol - 273) + extra;
|
||||||
|
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||||
|
copyFromBuffer(length, distance);
|
||||||
|
}
|
||||||
|
else if (symbol <= 280)
|
||||||
|
{
|
||||||
|
unsigned char extra{0};
|
||||||
|
mInputStream->readNextNBits(4, extra);
|
||||||
|
|
||||||
|
auto length = 67 + 16*(symbol - 277) + extra;
|
||||||
|
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||||
|
copyFromBuffer(length, distance);
|
||||||
|
}
|
||||||
|
else if (symbol <= 284)
|
||||||
|
{
|
||||||
|
unsigned char extra{0};
|
||||||
|
mInputStream->readNextNBits(5, extra);
|
||||||
|
|
||||||
|
auto length = 131 + 32*(symbol - 281) + extra;
|
||||||
|
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||||
|
copyFromBuffer(length, distance);
|
||||||
|
}
|
||||||
|
else if (symbol == 285)
|
||||||
|
{
|
||||||
|
auto length = 258;
|
||||||
|
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||||
|
copyFromBuffer(length, distance);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hit_end_stream)
|
||||||
|
{
|
||||||
|
std::cout << "Found end of stream ok" << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HuffmanStream::decode()
|
bool HuffmanStream::decode()
|
||||||
|
@ -140,6 +278,14 @@ bool HuffmanStream::decode()
|
||||||
if (!mUsingFixedCodes)
|
if (!mUsingFixedCodes)
|
||||||
{
|
{
|
||||||
readCodingsTable();
|
readCodingsTable();
|
||||||
|
|
||||||
|
readSymbols();
|
||||||
|
|
||||||
|
std::cout << "Got final buffer size " << mBuffer.size() << std::endl;
|
||||||
|
for(unsigned idx=0; idx< 100; idx++)
|
||||||
|
{
|
||||||
|
//std::cout << idx << " | " << mBuffer[idx] << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -163,8 +309,6 @@ bool HuffmanStream::decode()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -190,13 +334,13 @@ void HuffmanStream::readCodingsTable()
|
||||||
unsigned char buffer{0};
|
unsigned char buffer{0};
|
||||||
for(unsigned idx = 0; idx< num_code_lengths; idx++)
|
for(unsigned idx = 0; idx< num_code_lengths; idx++)
|
||||||
{
|
{
|
||||||
std::cout << "After codings " << mInputStream->logLocation();
|
|
||||||
mInputStream->readNextNBits(3, buffer);
|
mInputStream->readNextNBits(3, buffer);
|
||||||
|
std::cout << "Got coding table value " << idx << " | " << static_cast<int>(buffer) << " | " << ByteUtils::toString(buffer) << std::endl;
|
||||||
sequence[idx] = buffer;
|
sequence[idx] = buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
mCodeLengthTable.setInputLengthSequence(sequence, true);
|
mCodeLengthTable.setInputLengthSequence(sequence, true);
|
||||||
mCodeLengthTable.buildPrefixCodes();
|
mCodeLengthTable.buildPrefixCodes();
|
||||||
|
|
||||||
readLiteralCodeLengths();
|
readCodeLengths();
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,17 +22,30 @@ public:
|
||||||
private:
|
private:
|
||||||
void readCodingsTable();
|
void readCodingsTable();
|
||||||
|
|
||||||
void readLiteralCodeLengths();
|
void readCodeLengths();
|
||||||
|
|
||||||
|
void readSymbols();
|
||||||
|
|
||||||
|
void copyFromBuffer(unsigned length, unsigned distance);
|
||||||
|
|
||||||
|
bool readNextLiteralSymbol(unsigned& buffer);
|
||||||
|
|
||||||
|
bool readNextDistanceSymbol(unsigned& buffer);
|
||||||
|
|
||||||
bool readNextCodeLengthSymbol(unsigned& buffer);
|
bool readNextCodeLengthSymbol(unsigned& buffer);
|
||||||
|
|
||||||
|
void addValue(unsigned value, unsigned& count, unsigned& lastValue, std::vector<unsigned char>& literals, unsigned numLiterals, std::vector<unsigned char>& distances);
|
||||||
|
|
||||||
BitStream* mInputStream;
|
BitStream* mInputStream;
|
||||||
BitStream* mOutputStream;
|
BitStream* mOutputStream;
|
||||||
|
|
||||||
|
std::vector<unsigned> mBuffer;
|
||||||
|
|
||||||
unsigned mNumLiterals{0}; // HLIT + 257
|
unsigned mNumLiterals{0}; // HLIT + 257
|
||||||
unsigned mNumDistances{0}; // HDIST + 1
|
unsigned mNumDistances{0}; // HDIST + 1
|
||||||
|
|
||||||
bool mUsingFixedCodes{false};
|
bool mUsingFixedCodes{false};
|
||||||
HuffmanCodeLengthTable mCodeLengthTable;
|
HuffmanCodeLengthTable mCodeLengthTable;
|
||||||
|
HuffmanCodeLengthTable mLiteralTable;
|
||||||
|
HuffmanCodeLengthTable mDistanceTable;
|
||||||
};
|
};
|
||||||
|
|
|
@ -81,6 +81,21 @@ std::optional<HuffmanTree::Symbol> HuffmanTree::findMatch(std::size_t treeIndex,
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<PrefixCode> HuffmanTree::getCode(Symbol symbol) const
|
||||||
|
{
|
||||||
|
for(const auto& entry : mTable)
|
||||||
|
{
|
||||||
|
for(const auto& data : entry.second)
|
||||||
|
{
|
||||||
|
if (data.second == symbol)
|
||||||
|
{
|
||||||
|
return data.first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
std::size_t HuffmanTree::getNumCodeLengths() const
|
std::size_t HuffmanTree::getNumCodeLengths() const
|
||||||
{
|
{
|
||||||
return mTable.size();
|
return mTable.size();
|
||||||
|
|
|
@ -46,6 +46,8 @@ public:
|
||||||
|
|
||||||
unsigned getCodeLength(std::size_t idx) const;
|
unsigned getCodeLength(std::size_t idx) const;
|
||||||
|
|
||||||
|
std::optional<PrefixCode> getCode(Symbol symbol) const;
|
||||||
|
|
||||||
void sortTable();
|
void sortTable();
|
||||||
private:
|
private:
|
||||||
std::vector<CodeLengthData> mTable;
|
std::vector<CodeLengthData> mTable;
|
||||||
|
|
|
@ -26,9 +26,9 @@ unsigned char ByteUtils::getByteN(uint32_t input, unsigned n)
|
||||||
return (input << 8*n) >> 24;
|
return (input << 8*n) >> 24;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned char ByteUtils::mirror(unsigned char byte, unsigned length)
|
uint32_t ByteUtils::mirror(uint32_t byte, unsigned length)
|
||||||
{
|
{
|
||||||
unsigned char ret{0};
|
uint32_t ret{0};
|
||||||
for(unsigned idx=0; idx<length; idx++)
|
for(unsigned idx=0; idx<length; idx++)
|
||||||
{
|
{
|
||||||
if (getBitN(byte, length - 1 - idx))
|
if (getBitN(byte, length - 1 - idx))
|
||||||
|
@ -121,31 +121,25 @@ unsigned char ByteUtils::getFromString(const std::string& string)
|
||||||
std::string ByteUtils::toString(uint32_t input, unsigned length)
|
std::string ByteUtils::toString(uint32_t input, unsigned length)
|
||||||
{
|
{
|
||||||
std::string ret;
|
std::string ret;
|
||||||
std::string working;
|
if (length > 8)
|
||||||
for(unsigned idx=0; idx<length; idx++)
|
|
||||||
{
|
{
|
||||||
if (idx > 0 && idx % 8 == 0)
|
unsigned overshoot = length - 8;
|
||||||
|
for(unsigned idx=0; idx<overshoot; idx++)
|
||||||
{
|
{
|
||||||
if (ret.empty())
|
ret += getBitN(input, length - 1 - idx) ? '1' : '0';
|
||||||
{
|
}
|
||||||
ret = working;
|
ret += "-";
|
||||||
}
|
for(unsigned idx=0; idx<8; idx++)
|
||||||
else
|
{
|
||||||
{
|
ret += getBitN(input, 7 - idx) ? '1' : '0';
|
||||||
ret = working + '-' + ret;
|
|
||||||
}
|
|
||||||
working = "";
|
|
||||||
}
|
}
|
||||||
working += getBitN(input, 7 - idx) ? '1' : '0';
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
if (length <= 8)
|
|
||||||
{
|
{
|
||||||
ret = working;
|
for(unsigned idx=0; idx<length; idx++)
|
||||||
}
|
{
|
||||||
else if(!working.empty())
|
ret += getBitN(input, 7 - idx) ? '1' : '0';
|
||||||
{
|
}
|
||||||
ret = working + '-' + ret;
|
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,7 @@ public:
|
||||||
|
|
||||||
static std::string toString(uint32_t input, unsigned length = 8);
|
static std::string toString(uint32_t input, unsigned length = 8);
|
||||||
|
|
||||||
static unsigned char mirror(unsigned char byte, unsigned length=0);
|
static uint32_t mirror(uint32_t input, unsigned length=0);
|
||||||
|
|
||||||
static void ReverseBuffer(char* buffer, char* reverse, unsigned size, unsigned targetSize);
|
static void ReverseBuffer(char* buffer, char* reverse, unsigned size, unsigned targetSize);
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,6 @@ void BitStream::write(uint32_t data)
|
||||||
unsigned num_bytes = sizeof(uint32_t);
|
unsigned num_bytes = sizeof(uint32_t);
|
||||||
for(unsigned idx=0; idx<num_bytes;idx++)
|
for(unsigned idx=0; idx<num_bytes;idx++)
|
||||||
{
|
{
|
||||||
std::cout << "Writing byte " << idx << " for multibyte" << std::endl;
|
|
||||||
writeByte(ByteUtils::getByteN(data, idx));
|
writeByte(ByteUtils::getByteN(data, idx));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,7 +71,7 @@ void BufferBitStream::writeByte(unsigned char data, bool checkOverflow)
|
||||||
{
|
{
|
||||||
mChecksumCalculator->addValue(out_byte);
|
mChecksumCalculator->addValue(out_byte);
|
||||||
}
|
}
|
||||||
std::cout << "Writing byte " << ByteUtils::toString(out_byte) << " had bitoffset of " << mBitOffset << std::endl;
|
//std::cout << "Writing byte " << ByteUtils::toString(out_byte) << " had bitoffset of " << mBitOffset << std::endl;
|
||||||
mBuffer.push_back(out_byte);
|
mBuffer.push_back(out_byte);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,6 +85,10 @@ public:
|
||||||
{
|
{
|
||||||
image_stream->writeByte(*byte);
|
image_stream->writeByte(*byte);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//std::cout << "Got filter type " << static_cast<int>(working_filter_type) << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "HuffmanStream.h"
|
#include "HuffmanStream.h"
|
||||||
|
#include "BufferBitStream.h"
|
||||||
|
|
||||||
void testHuffmanCodeLengthTable()
|
void testHuffmanCodeLengthTable()
|
||||||
{
|
{
|
||||||
|
@ -31,13 +31,115 @@ void testHuffmanCodeLengthTable()
|
||||||
{
|
{
|
||||||
std::cout << "Slot " << idx << " length " << compressed_lengths[idx] << std::endl;
|
std::cout << "Slot " << idx << " length " << compressed_lengths[idx] << std::endl;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void testLiteralsTable()
|
||||||
|
{
|
||||||
|
std::vector<unsigned char> lengths = {7,4,4,7,5,5,7,7,6,6,7,6,6,6,8,6,6,8,
|
||||||
|
6,6,7,6,8,7,7,7,7,7,7,6,6,7,7,6,6,7,7,8,8,7,7,7,6,6,7,7,7,7,6,7,7,7,
|
||||||
|
7,7,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,4,4,5,6,7,7,9,8,8,9,9,8,8,7,
|
||||||
|
7,8,7,6,8,9,9,11,10,8,7,7,8,8,7,3,0,9,9,9,9,9,8,6,7,7,7,7,9,5,7,4,7,4,4,4,3,4,4,4,4,4,5,5,6};
|
||||||
|
|
||||||
|
HuffmanCodeLengthTable table;
|
||||||
|
table.setInputLengthSequence(lengths, false);
|
||||||
|
|
||||||
|
table.buildCompressedLengthSequence();
|
||||||
|
|
||||||
|
auto compressed_sequence = table.getCompressedLengthSequence();
|
||||||
|
for (auto entry : compressed_sequence)
|
||||||
|
{
|
||||||
|
std::cout << "Code " << entry.first << " extra bits " << entry.second << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto compressed_lengths = table.getCompressedLengthCounts();
|
||||||
|
for(unsigned idx = 0; idx<compressed_lengths.size(); idx++)
|
||||||
|
{
|
||||||
|
std::cout << "Slot " << idx << " length " << compressed_lengths[idx] << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
HuffmanCodeLengthTable codingTable;
|
||||||
|
std::vector<unsigned char> coding_lengths{4, 0, 6, 7, 3, 2, 4, 2, 7, 4, 6, 3, 0, 6, 0, 0, 0, 0, 0};
|
||||||
|
codingTable.setInputLengthSequence(coding_lengths, true);
|
||||||
|
codingTable.buildPrefixCodes();
|
||||||
|
|
||||||
|
BufferBitStream out_stream;
|
||||||
|
out_stream.writeNBits(1, 1);
|
||||||
|
out_stream.writeNBits(2, 2);
|
||||||
|
|
||||||
|
out_stream.writeNBits(29, 5);
|
||||||
|
out_stream.writeNBits(29, 5);
|
||||||
|
out_stream.writeNBits(10, 4);
|
||||||
|
|
||||||
|
/*
|
||||||
|
std::vector<unsigned char> permuted(19, 0);
|
||||||
|
static constexpr unsigned DEFLATE_PERMUTATION[19]{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
|
||||||
|
unsigned count = 0;
|
||||||
|
for (auto length : coding_lengths)
|
||||||
|
{
|
||||||
|
permuted[DEFLATE_PERMUTATION[count]] = length;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned skip_count = 0;
|
||||||
|
for(unsigned idx=0; idx<permuted.size();idx++)
|
||||||
|
{
|
||||||
|
if (permuted[permuted.size() - 1 - idx] == 0)
|
||||||
|
{
|
||||||
|
skip_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "Got skip count " << skip_count << std::endl;
|
||||||
|
|
||||||
|
for(unsigned idx=0; idx<permuted.size() - skip_count;idx++)
|
||||||
|
{
|
||||||
|
out_stream.writeNBits(permuted[idx], 3);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
for(unsigned idx=0; idx<14;idx++)
|
||||||
|
{
|
||||||
|
out_stream.writeNBits(coding_lengths[idx], 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& entry : compressed_sequence)
|
||||||
|
{
|
||||||
|
auto prefix_code = *codingTable.getCodeForSymbol(entry.first);
|
||||||
|
out_stream.writeNBits(prefix_code.getData(), prefix_code.getLength());
|
||||||
|
|
||||||
|
std::cout << "Stream count " << out_stream.getBuffer().size() << " for entry " << entry.first << std::endl;
|
||||||
|
|
||||||
|
if (entry.first == 16)
|
||||||
|
{
|
||||||
|
out_stream.writeNBits(entry.second, 2);
|
||||||
|
}
|
||||||
|
else if (entry.first == 17)
|
||||||
|
{
|
||||||
|
out_stream.writeNBits(entry.second, 3);
|
||||||
|
}
|
||||||
|
else if (entry.first == 18)
|
||||||
|
{
|
||||||
|
out_stream.writeNBits(entry.second, 7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out_stream.resetOffsets();
|
||||||
|
|
||||||
|
std::cout << "Output is: " << std::endl;
|
||||||
|
auto dump = out_stream.logNextNBytes(out_stream.getBuffer().size());
|
||||||
|
std::cout << dump << std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
testHuffmanCodeLengthTable();
|
//testHuffmanCodeLengthTable();
|
||||||
|
|
||||||
|
testLiteralsTable();
|
||||||
//HuffmanStream stream(nullptr, nullptr);
|
//HuffmanStream stream(nullptr, nullptr);
|
||||||
|
|
||||||
//stream.setCodeLengthAlphabetLengths({3, 3, 3, 3, 3, 2, 4, 4});
|
//stream.setCodeLengthAlphabetLengths({3, 3, 3, 3, 3, 2, 4, 4});
|
||||||
|
|
|
@ -29,5 +29,9 @@ int main()
|
||||||
auto out = ByteUtils::mirror(byte);
|
auto out = ByteUtils::mirror(byte);
|
||||||
std::cout << "Mirror is " << ByteUtils::toString(out) << std::endl;
|
std::cout << "Mirror is " << ByteUtils::toString(out) << std::endl;
|
||||||
|
|
||||||
|
unsigned hold = byte;
|
||||||
|
hold = (hold << 5) + 3;
|
||||||
|
std::cout << "Big val is " << ByteUtils::toString(hold, 16) << std::endl;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,9 +7,9 @@
|
||||||
|
|
||||||
void testThirdParty()
|
void testThirdParty()
|
||||||
{
|
{
|
||||||
//const auto path = "/home/jmsgrogan/Downloads/test.png";
|
const auto path = "/home/jmsgrogan/Downloads/test.png";
|
||||||
|
|
||||||
const auto path = "/home/jmsgrogan/Downloads/index.png";
|
//const auto path = "/home/jmsgrogan/Downloads/index.png";
|
||||||
|
|
||||||
//const auto path = "/home/jmsgrogan/code/MediaTool-build/bin/test.png";
|
//const auto path = "/home/jmsgrogan/code/MediaTool-build/bin/test.png";
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue