Add test fixture.
This commit is contained in:
parent
af6fad72eb
commit
d6d4319e21
37 changed files with 421 additions and 279 deletions
|
@ -64,11 +64,9 @@ unsigned char Lz77Encoder::getSearchBufferItem(unsigned index) const
|
|||
unsigned Lz77Encoder::lookAheadForMatchingChars(unsigned distance)
|
||||
{
|
||||
unsigned length{0};
|
||||
std::cout << "In hit check at distance " << distance << " max buffer index is: " << mMaxLookAheadBufferIndex << std::endl;
|
||||
for(unsigned idx=0; idx<mMaxLookAheadBufferIndex + 1; idx++)
|
||||
{
|
||||
int search_offset = int(distance-1) - idx;
|
||||
std::cout << "Have search offet " << search_offset << std::endl;
|
||||
unsigned char search_char{0};
|
||||
if (search_offset < 0)
|
||||
{
|
||||
|
@ -78,10 +76,7 @@ unsigned Lz77Encoder::lookAheadForMatchingChars(unsigned distance)
|
|||
{
|
||||
search_char = getSearchBufferItem(static_cast<unsigned>(search_offset));
|
||||
}
|
||||
|
||||
unsigned char lookahead_char = mLookaheadBuffer.getItem(idx);
|
||||
|
||||
std::cout << "Checking search char " << static_cast<int>(search_char) << " and lookup char " << static_cast<int>(lookahead_char) << std::endl;
|
||||
if ((lookahead_char != search_char) || (idx == mMaxLookAheadBufferIndex))
|
||||
{
|
||||
if (idx + 1>= mMinLengthMatchSize)
|
||||
|
@ -100,9 +95,7 @@ void Lz77Encoder::lookForMatches(unsigned char searchChar, unsigned& hitLength,
|
|||
{
|
||||
if (mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - idx) == searchChar)
|
||||
{
|
||||
std::cout << "Looking for hits " << std::endl;
|
||||
auto num_hits = lookAheadForMatchingChars(idx + 1);
|
||||
|
||||
if (num_hits > 0 && num_hits >= hitLength)
|
||||
{
|
||||
hitLength = num_hits;
|
||||
|
@ -197,14 +190,10 @@ bool Lz77Encoder::encode()
|
|||
}
|
||||
|
||||
const auto working_byte = mLookaheadBuffer.getItem(0);
|
||||
std::cout << "Working byte is " << static_cast<int>(working_byte) << std::endl;
|
||||
|
||||
unsigned hit_length{0};
|
||||
unsigned hit_distance{0};
|
||||
lookForMatches(working_byte, hit_length, hit_distance);
|
||||
|
||||
std::cout << "Got hit length " << hit_length << " and distance " << hit_distance << std::endl;
|
||||
|
||||
const Hit hit{hit_length, hit_distance, working_byte};
|
||||
mHitBuffer.push_back(hit);
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include "RawTree.h"
|
||||
|
||||
#include "HuffmanFixedCodes.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <queue>
|
||||
#include <tuple>
|
||||
|
@ -40,34 +42,22 @@ void HuffmanEncoder::dumpTree(const RawTree<CountPair>& tree) const
|
|||
dumpNode(tree.getRootNode(), 0);
|
||||
}
|
||||
|
||||
void HuffmanEncoder::encode(const HuffmanEncoder::DataStream& stream)
|
||||
void HuffmanEncoder::encode(const std::vector<unsigned>& counts)
|
||||
{
|
||||
std::unordered_map<unsigned char, unsigned> counts;
|
||||
for (auto c : stream)
|
||||
{
|
||||
counts[c]++;
|
||||
}
|
||||
encode(counts);
|
||||
}
|
||||
|
||||
void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& counts)
|
||||
{
|
||||
std::cout << "Counts" << std::endl;
|
||||
for (const auto& data: counts)
|
||||
{
|
||||
std::cout << data.first << " | " << data.second << std::endl;
|
||||
}
|
||||
std::cout << "*******" << std::endl;
|
||||
|
||||
auto cmp = [](RawNode<CountPair>* left, RawNode<CountPair>* right)
|
||||
{
|
||||
return left->getData().second > right->getData().second;
|
||||
};
|
||||
|
||||
std::priority_queue<RawNode<CountPair>*, std::vector<RawNode<CountPair>* >, decltype(cmp)> q(cmp);
|
||||
for (const auto& entry : counts)
|
||||
unsigned offset{0};
|
||||
for (auto count : counts)
|
||||
{
|
||||
q.push(new RawNode<CountPair>(entry));
|
||||
if (count > 0)
|
||||
{
|
||||
q.push(new RawNode<CountPair>({offset, count}));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
|
||||
while(q.size() > 1)
|
||||
|
@ -99,6 +89,18 @@ void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& c
|
|||
std::cout << "********" << std::endl;
|
||||
}
|
||||
|
||||
void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& counts)
|
||||
{
|
||||
std::vector<unsigned> just_counts;
|
||||
for (const auto& data: counts)
|
||||
{
|
||||
mSymbolMapping.push_back(data.first);
|
||||
just_counts.push_back(data.second);
|
||||
}
|
||||
|
||||
encode(just_counts);
|
||||
}
|
||||
|
||||
void HuffmanEncoder::setUseFixedCode(bool useFixed)
|
||||
{
|
||||
mUseFixedCode = useFixed;
|
||||
|
@ -129,16 +131,43 @@ std::optional<PrefixCode> HuffmanEncoder::getEndOfStreamValue() const
|
|||
return mLiteralLengthTable.getCodeForSymbol(256);
|
||||
}
|
||||
|
||||
void HuffmanEncoder::initializeTrees()
|
||||
void HuffmanEncoder::initializeTrees(const std::vector<Hit>& hits)
|
||||
{
|
||||
initializeLiteralLengthTable();
|
||||
initializeLiteralLengthTable(hits);
|
||||
}
|
||||
|
||||
void HuffmanEncoder::initializeLiteralLengthTable()
|
||||
void HuffmanEncoder::initializeLiteralLengthTable(const std::vector<Hit>& hits)
|
||||
{
|
||||
if(mUseFixedCode)
|
||||
{
|
||||
mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
|
||||
mLiteralLengthTable.buildPrefixCodes();
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<unsigned> counts(285, 0);
|
||||
counts[256] = 1;
|
||||
for (const auto& hit : hits)
|
||||
{
|
||||
const auto& [length, distance, next_char] = hit;
|
||||
if (length > 0 )
|
||||
{
|
||||
const auto& [code, extra_bits, num_extra_bits] = HuffmanFixedCodes::getCodeForLength(length);
|
||||
counts[code]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
counts[next_char]++;
|
||||
}
|
||||
}
|
||||
|
||||
for(unsigned idx=0; idx<counts.size(); idx++)
|
||||
{
|
||||
if (counts[idx]>0)
|
||||
{
|
||||
std::cout << "Count for " << idx << " is " << counts[idx] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
encode(counts);
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "HuffmanFixedCodes.h"
|
||||
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
|
||||
class PrefixCodeGenerator
|
||||
|
@ -21,11 +22,11 @@ public:
|
|||
|
||||
class HuffmanEncoder : public PrefixCodeGenerator
|
||||
{
|
||||
using DataStream = std::vector<unsigned char>;
|
||||
using CountPair = std::pair<unsigned char, unsigned>;
|
||||
using CountPair = std::pair<unsigned, unsigned>;
|
||||
using Hit = std::tuple<unsigned, unsigned, unsigned char>;
|
||||
|
||||
public:
|
||||
void encode(const DataStream& stream);
|
||||
void encode(const std::vector<unsigned>& counts);
|
||||
void encode(const std::unordered_map<unsigned char, unsigned>& counts);
|
||||
|
||||
uint32_t getLengthValue(unsigned length);
|
||||
|
@ -38,18 +39,19 @@ public:
|
|||
|
||||
std::optional<PrefixCode> getEndOfStreamValue() const override;
|
||||
|
||||
void initializeTrees();
|
||||
void initializeTrees(const std::vector<Hit>& hits);
|
||||
|
||||
void setUseFixedCode(bool useFixed);
|
||||
|
||||
private:
|
||||
void initializeLiteralLengthTable();
|
||||
void initializeLiteralLengthTable(const std::vector<Hit>& hits);
|
||||
|
||||
void dumpTree(const RawTree<CountPair>& tree) const;
|
||||
void dumpNode(RawNode<CountPair>* node, unsigned depth) const;
|
||||
|
||||
bool mUseFixedCode{false};
|
||||
bool mTableIsInitialized{false};
|
||||
|
||||
std::vector<unsigned char> mSymbolMapping;
|
||||
HuffmanCodeLengthTable mLiteralLengthTable;
|
||||
HuffmanCodeLengthTable mDistanceTable;
|
||||
};
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
|
||||
namespace HuffmanFixedCodes
|
||||
{
|
||||
|
@ -17,4 +18,128 @@ namespace HuffmanFixedCodes
|
|||
}
|
||||
return sequence;
|
||||
}
|
||||
|
||||
inline std::tuple<unsigned, unsigned char, unsigned char> getCodeForLength(unsigned length)
|
||||
{
|
||||
if (length <= 10)
|
||||
{
|
||||
return {length - 3, 0, 0};
|
||||
}
|
||||
|
||||
unsigned base = 2;
|
||||
unsigned last_offset = 10;
|
||||
for (unsigned n = 1; n < 5; n++)
|
||||
{
|
||||
const auto diff = length - last_offset;
|
||||
if (diff <= 4*base)
|
||||
{
|
||||
auto extra = diff/base + diff % base;
|
||||
return {last_offset + diff/base, extra, n};
|
||||
}
|
||||
|
||||
last_offset += 4*n;
|
||||
base = base*2;
|
||||
}
|
||||
return {258, 0, 0};
|
||||
}
|
||||
|
||||
/*
|
||||
inline std::pair<unsigned, unsigned char> getCodeForLength(unsigned length)
|
||||
{
|
||||
if (length <= 10)
|
||||
{
|
||||
return {257 + length - 3, 0};
|
||||
}
|
||||
else if(length <= 18)
|
||||
{
|
||||
auto offset = length - 10;
|
||||
auto extra = offset/2 + offset % 2;
|
||||
return {265 + offset/2, extra};
|
||||
}
|
||||
else if(length <= 34)
|
||||
{
|
||||
auto offset = length - 19;
|
||||
auto extra = offset/4 + offset % 4;
|
||||
return {269 + offset/4, extra};
|
||||
}
|
||||
else if(length <= 66)
|
||||
{
|
||||
auto offset = length - 35;
|
||||
auto extra = offset/8 + offset % 8;
|
||||
return {273 + offset/8, extra};
|
||||
}
|
||||
else if(length <= 114)
|
||||
{
|
||||
auto offset = length - 67;
|
||||
auto extra = offset/16 + offset % 16;
|
||||
return {277 + offset/16, extra};
|
||||
}
|
||||
else if(length <= 257)
|
||||
{
|
||||
auto offset = length - 115;
|
||||
auto extra = offset/32 + offset % 32;
|
||||
return {281 + offset/32, extra};
|
||||
}
|
||||
else
|
||||
{
|
||||
return {258, 0};
|
||||
}
|
||||
}
|
||||
|
||||
inline unsigned getLengthForCode(unsigned symbol, unsigned extra)
|
||||
{
|
||||
if (symbol <= 264)
|
||||
{
|
||||
return 3 + symbol - 257;
|
||||
}
|
||||
else if (symbol <= 268)
|
||||
{
|
||||
return 11 + 2*(symbol - 265) + extra;
|
||||
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||
copyFromBuffer(length, distance);
|
||||
}
|
||||
else if (symbol <= 272)
|
||||
{
|
||||
unsigned char extra{0};
|
||||
mInputStream->readNextNBits(2, extra);
|
||||
|
||||
auto length = 19 + 4*(symbol - 269) + extra;
|
||||
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||
copyFromBuffer(length, distance);
|
||||
}
|
||||
else if (symbol <= 276)
|
||||
{
|
||||
unsigned char extra{0};
|
||||
mInputStream->readNextNBits(3, extra);
|
||||
|
||||
auto length = 35 + 8*(symbol - 273) + extra;
|
||||
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||
copyFromBuffer(length, distance);
|
||||
}
|
||||
else if (symbol <= 280)
|
||||
{
|
||||
unsigned char extra{0};
|
||||
mInputStream->readNextNBits(4, extra);
|
||||
|
||||
auto length = 67 + 16*(symbol - 277) + extra;
|
||||
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||
copyFromBuffer(length, distance);
|
||||
}
|
||||
else if (symbol <= 284)
|
||||
{
|
||||
unsigned char extra{0};
|
||||
mInputStream->readNextNBits(5, extra);
|
||||
|
||||
auto length = 131 + 32*(symbol - 281) + extra;
|
||||
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||
copyFromBuffer(length, distance);
|
||||
}
|
||||
else if (symbol == 285)
|
||||
{
|
||||
auto length = 258;
|
||||
const auto valid_dist = readNextDistanceSymbol(distance);
|
||||
copyFromBuffer(length, distance);
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
|
|
@ -200,7 +200,6 @@ void PngWriter::write(const std::unique_ptr<Image<unsigned char> >& image)
|
|||
{
|
||||
auto huffman_encoder = std::make_unique<HuffmanEncoder>();
|
||||
huffman_encoder->setUseFixedCode(true);
|
||||
huffman_encoder->initializeTrees();
|
||||
lz77_encoder.setPrefixCodeGenerator(std::move(huffman_encoder));
|
||||
}
|
||||
lz77_encoder.encode();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue