stuff-from-scratch/src/compression/huffman/HuffmanEncoder.cpp
2022-11-30 18:28:50 +00:00

173 lines
4.1 KiB
C++

#include "HuffmanEncoder.h"
#include "RawTree.h"
#include "HuffmanFixedCodes.h"
#include <unordered_map>
#include <queue>
#include <tuple>
#include <iostream>
void HuffmanEncoder::dumpNode(RawNode<CountPair>* node, unsigned depth) const
{
if (!node)
{
return;
}
auto data = node->getData();
std::string prefix(depth, '_');
if (node->isLeaf())
{
//std::cout << prefix << "Leaf with value: " << data.first << " and sum " << data.second << std::endl;
}
else
{
//std::cout << prefix << "Intermediate with sum " << data.second << std::endl;
//std::cout << prefix << "Doing Left.." << std::endl;
dumpNode(node->getLeftChild(), depth+1);
//std::cout << prefix << "Doing Right.." << std::endl;
dumpNode(node->getRightChild(), depth+1);
//std::cout << prefix << "*****" << std::endl;
}
}
void HuffmanEncoder::dumpTree(const RawTree<CountPair>& tree) const
{
dumpNode(tree.getRootNode(), 0);
}
void HuffmanEncoder::encode(const std::vector<unsigned>& counts)
{
auto cmp = [](RawNode<CountPair>* left, RawNode<CountPair>* right)
{
return left->getData().second > right->getData().second;
};
std::priority_queue<RawNode<CountPair>*, std::vector<RawNode<CountPair>* >, decltype(cmp)> q(cmp);
unsigned offset{0};
for (auto count : counts)
{
if (count > 0)
{
q.push(new RawNode<CountPair>({offset, count}));
}
offset++;
}
while(q.size() > 1)
{
auto node0 = q.top();
q.pop();
auto node1 = q.top();
q.pop();
const auto sum = node0->getData().second + node1->getData().second;
auto new_node = new RawNode<CountPair>(CountPair{0, sum});
new_node->addChild(node0);
new_node->addChild(node1);
q.push(new_node);
}
auto root = q.top();
q.pop();
RawTree<CountPair> tree;
tree.addRootNode(root);
//using TableEntry = std::tuple<>
//dumpTree(tree);
//std::cout << "********" << std::endl;
}
void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& counts)
{
std::vector<unsigned> just_counts;
for (const auto& data: counts)
{
mSymbolMapping.push_back(data.first);
just_counts.push_back(data.second);
}
encode(just_counts);
}
void HuffmanEncoder::setUseFixedCode(bool useFixed)
{
mUseFixedCode = useFixed;
}
uint32_t HuffmanEncoder::getLengthValue(unsigned length)
{
return 0;
}
std::optional<PrefixCode> HuffmanEncoder::getLiteralValue(unsigned char value) const
{
return mLiteralLengthTable.getCodeForSymbol(value);
}
std::optional<PrefixCode> HuffmanEncoder::getLengthValue(unsigned length) const
{
return mLiteralLengthTable.getCodeForSymbol(length);
}
std::optional<PrefixCode> HuffmanEncoder::getDistanceValue(unsigned distance) const
{
return mDistanceTable.getCodeForSymbol(distance);
}
std::optional<PrefixCode> HuffmanEncoder::getEndOfStreamValue() const
{
return mLiteralLengthTable.getCodeForSymbol(256);
}
void HuffmanEncoder::initializeTrees(const std::vector<Hit>& hits)
{
initializeLiteralLengthTable(hits);
}
void HuffmanEncoder::initializeLiteralLengthTable(const std::vector<Hit>& hits)
{
if(mUseFixedCode)
{
mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
mLiteralLengthTable.buildPrefixCodes();
return;
}
std::vector<unsigned> counts(285, 0);
counts[256] = 1;
for (const auto& hit : hits)
{
const auto& [length, distance, next_char] = hit;
if (length > 0 )
{
const auto& [code, extra_bits, num_extra_bits] = HuffmanFixedCodes::getCodeForLength(length);
counts[code]++;
}
else
{
counts[next_char]++;
}
}
for(unsigned idx=0; idx<counts.size(); idx++)
{
if (counts[idx]>0)
{
//std::cout << "Count for " << idx << " is " << counts[idx] << std::endl;
}
}
encode(counts);
}