173 lines
4.1 KiB
C++
173 lines
4.1 KiB
C++
#include "HuffmanEncoder.h"
|
|
|
|
#include "RawTree.h"
|
|
|
|
#include "HuffmanFixedCodes.h"
|
|
|
|
#include <unordered_map>
|
|
#include <queue>
|
|
#include <tuple>
|
|
#include <iostream>
|
|
|
|
void HuffmanEncoder::dumpNode(RawNode<CountPair>* node, unsigned depth) const
|
|
{
|
|
if (!node)
|
|
{
|
|
return;
|
|
}
|
|
|
|
auto data = node->getData();
|
|
|
|
std::string prefix(depth, '_');
|
|
|
|
if (node->isLeaf())
|
|
{
|
|
//std::cout << prefix << "Leaf with value: " << data.first << " and sum " << data.second << std::endl;
|
|
}
|
|
else
|
|
{
|
|
//std::cout << prefix << "Intermediate with sum " << data.second << std::endl;
|
|
//std::cout << prefix << "Doing Left.." << std::endl;
|
|
dumpNode(node->getLeftChild(), depth+1);
|
|
|
|
//std::cout << prefix << "Doing Right.." << std::endl;
|
|
dumpNode(node->getRightChild(), depth+1);
|
|
|
|
//std::cout << prefix << "*****" << std::endl;
|
|
}
|
|
}
|
|
|
|
void HuffmanEncoder::dumpTree(const RawTree<CountPair>& tree) const
|
|
{
|
|
dumpNode(tree.getRootNode(), 0);
|
|
}
|
|
|
|
void HuffmanEncoder::encode(const std::vector<unsigned>& counts)
|
|
{
|
|
auto cmp = [](RawNode<CountPair>* left, RawNode<CountPair>* right)
|
|
{
|
|
return left->getData().second > right->getData().second;
|
|
};
|
|
|
|
std::priority_queue<RawNode<CountPair>*, std::vector<RawNode<CountPair>* >, decltype(cmp)> q(cmp);
|
|
unsigned offset{0};
|
|
for (auto count : counts)
|
|
{
|
|
if (count > 0)
|
|
{
|
|
q.push(new RawNode<CountPair>({offset, count}));
|
|
}
|
|
offset++;
|
|
}
|
|
|
|
while(q.size() > 1)
|
|
{
|
|
auto node0 = q.top();
|
|
q.pop();
|
|
|
|
auto node1 = q.top();
|
|
q.pop();
|
|
|
|
const auto sum = node0->getData().second + node1->getData().second;
|
|
auto new_node = new RawNode<CountPair>(CountPair{0, sum});
|
|
|
|
new_node->addChild(node0);
|
|
new_node->addChild(node1);
|
|
q.push(new_node);
|
|
}
|
|
|
|
auto root = q.top();
|
|
q.pop();
|
|
|
|
RawTree<CountPair> tree;
|
|
tree.addRootNode(root);
|
|
|
|
//using TableEntry = std::tuple<>
|
|
|
|
//dumpTree(tree);
|
|
|
|
//std::cout << "********" << std::endl;
|
|
}
|
|
|
|
void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& counts)
|
|
{
|
|
std::vector<unsigned> just_counts;
|
|
for (const auto& data: counts)
|
|
{
|
|
mSymbolMapping.push_back(data.first);
|
|
just_counts.push_back(data.second);
|
|
}
|
|
|
|
encode(just_counts);
|
|
}
|
|
|
|
void HuffmanEncoder::setUseFixedCode(bool useFixed)
|
|
{
|
|
mUseFixedCode = useFixed;
|
|
}
|
|
|
|
uint32_t HuffmanEncoder::getLengthValue(unsigned length)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
std::optional<PrefixCode> HuffmanEncoder::getLiteralValue(unsigned char value) const
|
|
{
|
|
return mLiteralLengthTable.getCodeForSymbol(value);
|
|
}
|
|
|
|
std::optional<PrefixCode> HuffmanEncoder::getLengthValue(unsigned length) const
|
|
{
|
|
return mLiteralLengthTable.getCodeForSymbol(length);
|
|
}
|
|
|
|
std::optional<PrefixCode> HuffmanEncoder::getDistanceValue(unsigned distance) const
|
|
{
|
|
return mDistanceTable.getCodeForSymbol(distance);
|
|
}
|
|
|
|
std::optional<PrefixCode> HuffmanEncoder::getEndOfStreamValue() const
|
|
{
|
|
return mLiteralLengthTable.getCodeForSymbol(256);
|
|
}
|
|
|
|
void HuffmanEncoder::initializeTrees(const std::vector<Hit>& hits)
|
|
{
|
|
initializeLiteralLengthTable(hits);
|
|
}
|
|
|
|
void HuffmanEncoder::initializeLiteralLengthTable(const std::vector<Hit>& hits)
|
|
{
|
|
if(mUseFixedCode)
|
|
{
|
|
mLiteralLengthTable.setInputLengthSequence(HuffmanFixedCodes::getDeflateFixedHuffmanCodes(), false);
|
|
mLiteralLengthTable.buildPrefixCodes();
|
|
return;
|
|
}
|
|
|
|
std::vector<unsigned> counts(285, 0);
|
|
counts[256] = 1;
|
|
for (const auto& hit : hits)
|
|
{
|
|
const auto& [length, distance, next_char] = hit;
|
|
if (length > 0 )
|
|
{
|
|
const auto& [code, extra_bits, num_extra_bits] = HuffmanFixedCodes::getCodeForLength(length);
|
|
counts[code]++;
|
|
}
|
|
else
|
|
{
|
|
counts[next_char]++;
|
|
}
|
|
}
|
|
|
|
for(unsigned idx=0; idx<counts.size(); idx++)
|
|
{
|
|
if (counts[idx]>0)
|
|
{
|
|
//std::cout << "Count for " << idx << " is " << counts[idx] << std::endl;
|
|
}
|
|
}
|
|
|
|
encode(counts);
|
|
}
|