Add test fixture.

This commit is contained in:
James Grogan 2022-11-29 18:00:19 +00:00
parent af6fad72eb
commit d6d4319e21
37 changed files with 421 additions and 279 deletions

View file

@ -64,11 +64,9 @@ unsigned char Lz77Encoder::getSearchBufferItem(unsigned index) const
unsigned Lz77Encoder::lookAheadForMatchingChars(unsigned distance)
{
unsigned length{0};
std::cout << "In hit check at distance " << distance << " max buffer index is: " << mMaxLookAheadBufferIndex << std::endl;
for(unsigned idx=0; idx<mMaxLookAheadBufferIndex + 1; idx++)
{
int search_offset = int(distance-1) - idx;
std::cout << "Have search offet " << search_offset << std::endl;
unsigned char search_char{0};
if (search_offset < 0)
{
@ -78,10 +76,7 @@ unsigned Lz77Encoder::lookAheadForMatchingChars(unsigned distance)
{
search_char = getSearchBufferItem(static_cast<unsigned>(search_offset));
}
unsigned char lookahead_char = mLookaheadBuffer.getItem(idx);
std::cout << "Checking search char " << static_cast<int>(search_char) << " and lookup char " << static_cast<int>(lookahead_char) << std::endl;
if ((lookahead_char != search_char) || (idx == mMaxLookAheadBufferIndex))
{
if (idx + 1>= mMinLengthMatchSize)
@ -100,9 +95,7 @@ void Lz77Encoder::lookForMatches(unsigned char searchChar, unsigned& hitLength,
{
if (mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - idx) == searchChar)
{
std::cout << "Looking for hits " << std::endl;
auto num_hits = lookAheadForMatchingChars(idx + 1);
if (num_hits > 0 && num_hits >= hitLength)
{
hitLength = num_hits;
@ -197,14 +190,10 @@ bool Lz77Encoder::encode()
}
const auto working_byte = mLookaheadBuffer.getItem(0);
std::cout << "Working byte is " << static_cast<int>(working_byte) << std::endl;
unsigned hit_length{0};
unsigned hit_distance{0};
lookForMatches(working_byte, hit_length, hit_distance);
std::cout << "Got hit length " << hit_length << " and distance " << hit_distance << std::endl;
const Hit hit{hit_length, hit_distance, working_byte};
mHitBuffer.push_back(hit);

View file

@ -2,6 +2,8 @@
#include "RawTree.h"
#include "HuffmanFixedCodes.h"
#include <unordered_map>
#include <queue>
#include <tuple>
@ -40,34 +42,22 @@ void HuffmanEncoder::dumpTree(const RawTree<CountPair>& tree) const
dumpNode(tree.getRootNode(), 0);
}
void HuffmanEncoder::encode(const HuffmanEncoder::DataStream& stream)
void HuffmanEncoder::encode(const std::vector<unsigned>& counts)
{
std::unordered_map<unsigned char, unsigned> counts;
for (auto c : stream)
{
counts[c]++;
}
encode(counts);
}
void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& counts)
{
std::cout << "Counts" << std::endl;
for (const auto& data: counts)
{
std::cout << data.first << " | " << data.second << std::endl;
}
std::cout << "*******" << std::endl;
auto cmp = [](RawNode<CountPair>* left, RawNode<CountPair>* right)
{
return left->getData().second > right->getData().second;
};
std::priority_queue<RawNode<CountPair>*, std::vector<RawNode<CountPair>* >, decltype(cmp)> q(cmp);
for (const auto& entry : counts)
unsigned offset{0};
for (auto count : counts)
{
q.push(new RawNode<CountPair>(entry));
if (count > 0)
{
q.push(new RawNode<CountPair>({offset, count}));
}
offset++;
}
while(q.size() > 1)
@ -99,6 +89,18 @@ void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& c
std::cout << "********" << std::endl;
}
void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& counts)
{
std::vector<unsigned> just_counts;
for (const auto& data: counts)
{
mSymbolMapping.push_back(data.first);
just_counts.push_back(data.second);
}
encode(just_counts);
}
void HuffmanEncoder::setUseFixedCode(bool useFixed)
{
mUseFixedCode = useFixed;
@ -129,16 +131,43 @@ std::optional<PrefixCode> HuffmanEncoder::getEndOfStreamValue() const
return mLiteralLengthTable.getCodeForSymbol(256);
}
void HuffmanEncoder::initializeTrees()
void HuffmanEncoder::initializeTrees(const std::vector<Hit>& hits)
{
initializeLiteralLengthTable();
initializeLiteralLengthTable(hits);
}
void HuffmanEncoder::initializeLiteralLengthTable()
void HuffmanEncoder::initializeLiteralLengthTable(const std::vector<Hit>& hits)
{
if(mUseFixedCode)
{
mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
mLiteralLengthTable.buildPrefixCodes();
return;
}
std::vector<unsigned> counts(285, 0);
counts[256] = 1;
for (const auto& hit : hits)
{
const auto& [length, distance, next_char] = hit;
if (length > 0 )
{
const auto& [code, extra_bits, num_extra_bits] = HuffmanFixedCodes::getCodeForLength(length);
counts[code]++;
}
else
{
counts[next_char]++;
}
}
for(unsigned idx=0; idx<counts.size(); idx++)
{
if (counts[idx]>0)
{
std::cout << "Count for " << idx << " is " << counts[idx] << std::endl;
}
}
encode(counts);
}

View file

@ -6,6 +6,7 @@
#include "HuffmanFixedCodes.h"
#include <vector>
#include <tuple>
#include <unordered_map>
class PrefixCodeGenerator
@ -21,11 +22,11 @@ public:
class HuffmanEncoder : public PrefixCodeGenerator
{
using DataStream = std::vector<unsigned char>;
using CountPair = std::pair<unsigned char, unsigned>;
using CountPair = std::pair<unsigned, unsigned>;
using Hit = std::tuple<unsigned, unsigned, unsigned char>;
public:
void encode(const DataStream& stream);
void encode(const std::vector<unsigned>& counts);
void encode(const std::unordered_map<unsigned char, unsigned>& counts);
uint32_t getLengthValue(unsigned length);
@ -38,18 +39,19 @@ public:
std::optional<PrefixCode> getEndOfStreamValue() const override;
void initializeTrees();
void initializeTrees(const std::vector<Hit>& hits);
void setUseFixedCode(bool useFixed);
private:
void initializeLiteralLengthTable();
void initializeLiteralLengthTable(const std::vector<Hit>& hits);
void dumpTree(const RawTree<CountPair>& tree) const;
void dumpNode(RawNode<CountPair>* node, unsigned depth) const;
bool mUseFixedCode{false};
bool mTableIsInitialized{false};
std::vector<unsigned char> mSymbolMapping;
HuffmanCodeLengthTable mLiteralLengthTable;
HuffmanCodeLengthTable mDistanceTable;
};

View file

@ -1,6 +1,7 @@
#pragma once
#include <vector>
#include <tuple>
namespace HuffmanFixedCodes
{
@ -17,4 +18,128 @@ namespace HuffmanFixedCodes
}
return sequence;
}
inline std::tuple<unsigned, unsigned char, unsigned char> getCodeForLength(unsigned length)
{
if (length <= 10)
{
return {length - 3, 0, 0};
}
unsigned base = 2;
unsigned last_offset = 10;
for (unsigned n = 1; n < 5; n++)
{
const auto diff = length - last_offset;
if (diff <= 4*base)
{
auto extra = diff/base + diff % base;
return {last_offset + diff/base, extra, n};
}
last_offset += 4*n;
base = base*2;
}
return {258, 0, 0};
}
/*
inline std::pair<unsigned, unsigned char> getCodeForLength(unsigned length)
{
if (length <= 10)
{
return {257 + length - 3, 0};
}
else if(length <= 18)
{
auto offset = length - 10;
auto extra = offset/2 + offset % 2;
return {265 + offset/2, extra};
}
else if(length <= 34)
{
auto offset = length - 19;
auto extra = offset/4 + offset % 4;
return {269 + offset/4, extra};
}
else if(length <= 66)
{
auto offset = length - 35;
auto extra = offset/8 + offset % 8;
return {273 + offset/8, extra};
}
else if(length <= 114)
{
auto offset = length - 67;
auto extra = offset/16 + offset % 16;
return {277 + offset/16, extra};
}
else if(length <= 257)
{
auto offset = length - 115;
auto extra = offset/32 + offset % 32;
return {281 + offset/32, extra};
}
else
{
return {258, 0};
}
}
inline unsigned getLengthForCode(unsigned symbol, unsigned extra)
{
if (symbol <= 264)
{
return 3 + symbol - 257;
}
else if (symbol <= 268)
{
return 11 + 2*(symbol - 265) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 272)
{
unsigned char extra{0};
mInputStream->readNextNBits(2, extra);
auto length = 19 + 4*(symbol - 269) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 276)
{
unsigned char extra{0};
mInputStream->readNextNBits(3, extra);
auto length = 35 + 8*(symbol - 273) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 280)
{
unsigned char extra{0};
mInputStream->readNextNBits(4, extra);
auto length = 67 + 16*(symbol - 277) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol <= 284)
{
unsigned char extra{0};
mInputStream->readNextNBits(5, extra);
auto length = 131 + 32*(symbol - 281) + extra;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
else if (symbol == 285)
{
auto length = 258;
const auto valid_dist = readNextDistanceSymbol(distance);
copyFromBuffer(length, distance);
}
}
*/
}

View file

@ -200,7 +200,6 @@ void PngWriter::write(const std::unique_ptr<Image<unsigned char> >& image)
{
auto huffman_encoder = std::make_unique<HuffmanEncoder>();
huffman_encoder->setUseFixedCode(true);
huffman_encoder->initializeTrees();
lz77_encoder.setPrefixCodeGenerator(std::move(huffman_encoder));
}
lz77_encoder.encode();