Some encode/decode practice.

This commit is contained in:
James Grogan 2022-11-21 17:45:12 +00:00
parent 8a41337e2d
commit ff962a6b16
29 changed files with 727 additions and 305 deletions

View file

@ -2,6 +2,8 @@
list(APPEND compression_LIB_INCLUDES
StreamCompressor.cpp
HuffmanEncoder.cpp
RunLengthEncoder.cpp
ZlibData.cpp
)
add_library(compression SHARED ${compression_LIB_INCLUDES})

View file

@ -1,77 +1,100 @@
#include "HuffmanEncoder.h"
#include "Tree.h"
#include "RawTree.h"
#include <unordered_map>
#include <queue>
#include <tuple>
#include <iostream>
void HuffmanEncoder::dumpNode(RawNode<CountPair>* node, unsigned depth) const
{
if (!node)
{
return;
}
auto data = node->getData();
std::string prefix(depth, '_');
if (node->isLeaf())
{
std::cout << prefix << "Leaf with value: " << data.first << " and sum " << data.second << std::endl;
}
else
{
std::cout << prefix << "Intermediate with sum " << data.second << std::endl;
std::cout << prefix << "Doing Left.." << std::endl;
dumpNode(node->getLeftChild(), depth+1);
std::cout << prefix << "Doing Right.." << std::endl;
dumpNode(node->getRightChild(), depth+1);
std::cout << prefix << "*****" << std::endl;
}
}
void HuffmanEncoder::dumpTree(const RawTree<CountPair>& tree) const
{
dumpNode(tree.getRootNode(), 0);
}
void HuffmanEncoder::encode(const HuffmanEncoder::DataStream& stream)
{
std::unordered_map<unsigned char, unsigned> counts;
for (auto c : stream)
{
counts[c]++;
}
using CountPair = std::pair<unsigned char, unsigned>;
auto cmp = [](CountPair left, CountPair right)
{
return left.second > right.second;
};
std::priority_queue<CountPair, std::vector<CountPair>, decltype(cmp)> q(cmp);
for (const auto& entry : counts)
{
q.push({entry.first, entry.second});
}
NodePtr<CountPair> lastNode;
while(!q.empty())
{
const auto charData = q.top();
auto characterNode = std::make_unique<Node<CountPair> >(charData);
q.pop();
if (!lastNode)
{
const auto rightCharData = q.top();
auto rightCharacterNode = std::make_unique<Node<CountPair> >(rightCharData);
q.pop();
const auto sum = charData.second + rightCharData.second;
CountPair data{0, sum};
auto midNode = std::make_unique<Node<CountPair> >(data);
midNode->addChild(std::move(characterNode));
midNode->addChild(std::move(rightCharacterNode));
lastNode = std::move(midNode);
}
else
{
const auto sum = lastNode->getData().second;
CountPair data{0, sum};
auto midNode = std::make_unique<Node<CountPair> >(data);
if (charData.second < lastNode->getData().second)
{
midNode->addChild(std::move(lastNode));
midNode->addChild(std::move(characterNode));
}
else
{
midNode->addChild(std::move(characterNode));
midNode->addChild(std::move(lastNode));
}
lastNode = std::move(midNode);
}
}
Tree<CountPair> tree;
tree.addRootNode(std::move(lastNode));
//using TableEntry = std::tuple<>
std::cout << "********" << std::endl;
std::unordered_map<unsigned char, unsigned> counts;
for (auto c : stream)
{
counts[c]++;
}
encode(counts);
}
void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& counts)
{
std::cout << "Counts" << std::endl;
for (const auto& data: counts)
{
std::cout << data.first << " | " << data.second << std::endl;
}
std::cout << "*******" << std::endl;
auto cmp = [](RawNode<CountPair>* left, RawNode<CountPair>* right)
{
return left->getData().second > right->getData().second;
};
std::priority_queue<RawNode<CountPair>*, std::vector<RawNode<CountPair>* >, decltype(cmp)> q(cmp);
for (const auto& entry : counts)
{
q.push(new RawNode<CountPair>(entry));
}
while(q.size() > 1)
{
auto node0 = q.top();
q.pop();
auto node1 = q.top();
q.pop();
const auto sum = node0->getData().second + node1->getData().second;
auto new_node = new RawNode<CountPair>(CountPair{0, sum});
new_node->addChild(node0);
new_node->addChild(node1);
q.push(new_node);
}
auto root = q.top();
q.pop();
RawTree<CountPair> tree;
tree.addRootNode(root);
//using TableEntry = std::tuple<>
dumpTree(tree);
std::cout << "********" << std::endl;
}

View file

@ -1,11 +1,21 @@
#pragma once
#include "RawTree.h"
#include <vector>
#include <unordered_map>
class HuffmanEncoder
{
using DataStream = std::vector<unsigned char>;
using CountPair = std::pair<unsigned char, unsigned>;
public:
void encode(const DataStream& stream);
void encode(const DataStream& stream);
void encode(const std::unordered_map<unsigned char, unsigned>& counts);
private:
void dumpTree(const RawTree<CountPair>& tree) const;
void dumpNode(RawNode<CountPair>* node, unsigned depth) const;
};

View file

View file

@ -0,0 +1,110 @@
#pragma once
#include "StringUtils.h"
#include <vector>
#include <string>
class RunLengthEncoder
{
public:
std::string encode(const std::string& string)
{
std::string ret;
if (string.empty())
{
return ret;
}
char working_char{0};
unsigned count = 1;
for(unsigned idx=0; idx<string.size(); idx++)
{
auto c = string[idx];
if (idx == 0)
{
working_char = c;
continue;
}
if (c == working_char)
{
count++;
}
else
{
insertCharacter(ret, working_char, count);
working_char = c;
count = 1;
}
}
insertCharacter(ret, working_char, count);
return ret;
}
std::string decode(const std::string& string)
{
std::string ret;
if (string.empty())
{
return ret;
}
unsigned count{0};
while(count < string.size())
{
auto c = string[count];
if (c == mDelimiter)
{
count++;
std::string reps;
char working_char{0};
while(count < string.size())
{
auto rep_char = string[count];
count++;
if (StringUtils::IsAlphabetical(rep_char))
{
working_char = rep_char;
break;
}
else
{
reps += rep_char;
}
}
for (unsigned idx=0; idx<std::stoul(reps); idx++)
{
ret += working_char;
}
}
else
{
ret += c;
count++;
}
}
return ret;
}
private:
void insertCharacter(std::string& output, char c, unsigned count)
{
if (count >= 3)
{
output += mDelimiter + std::to_string(count) + c;
}
else
{
for (unsigned jdx=0;jdx<count; jdx++)
{
output += c;
}
}
}
char mDelimiter {'@'};
};

View file

View file

@ -0,0 +1,97 @@
#pragma once
#include "ByteUtils.h"
#include <vector>
#include <iostream>
class ZlibData
{
public:
void setByte(unsigned idx, unsigned char data)
{
mData[idx] = data;
}
void setDataSize(std::size_t size)
{
mData = std::vector<unsigned char>(size);
}
void setCompressionMethod(unsigned char method)
{
std::cout << "Got compression input " << static_cast<int>(method) << std::endl;
mCmf = method;
mCompressionMethod = ByteUtils::getLowerNBits(method, 4);
mCompressionInfo = ByteUtils::getHigherNBits(method, 4);
std::cout << "Got compression method " << static_cast<int>(mCompressionMethod) << " and info " << static_cast<int>(mCompressionInfo) << std::endl;
}
void setExtraFlags(unsigned char extraFlags)
{
std::cout << "Got flags " << static_cast<int>(extraFlags) << std::endl;
mFlg = extraFlags;
mFlagCheck = ByteUtils::getLowerNBits(extraFlags, 5);
mFlagDict = ByteUtils::getBitN(extraFlags, 5);
mFlagLevel = ByteUtils::getHigherNBits(extraFlags, 2);
std::cout << "Got flag check " << static_cast<int>(mFlagCheck) << " and dict " << static_cast<int>(mFlagDict) << " and level " << static_cast<int>(mFlagLevel) << std::endl;
}
void processData()
{
unsigned char NO_COMPRESSION = 0x00;
unsigned char FIXED_HUFFMAN = 0x01;
unsigned char DYNAMIC_HUFFMAN = 0x02;
unsigned char ERROR = 0x03;
bool in_final_block = false;
unsigned working_byte_id = 0;
for (unsigned idx=0; idx<mData.size(); idx++)
{
auto working_byte = mData[working_byte_id];
std::cout << "Into process data, byte is: " << static_cast<int>(working_byte) << std::endl;
auto final_block = ByteUtils::getBitN(working_byte, 0);
if (final_block)
{
std::cout << "Got final block" << std::endl;
in_final_block = true;
}
auto compress_type = ByteUtils::getTwoBitsAtN(working_byte, 1);
std::cout << "Compress type byte is: " << static_cast<int>(compress_type) << std::endl;
if (compress_type == NO_COMPRESSION)
{
std::cout << "Got NO_COMPRESSION" << std::endl;
}
else if (compress_type == FIXED_HUFFMAN)
{
std::cout << "Got FIXED_HUFFMAN" << std::endl;
}
else if (compress_type == DYNAMIC_HUFFMAN)
{
std::cout << "Got DYNAMIC_HUFFMAN" << std::endl;
}
else if (compress_type == ERROR)
{
std::cout << "Got ERROR" << std::endl;
}
break;
}
}
private:
std::vector<unsigned char> mData;
unsigned char mCmf{0};
unsigned char mFlg{0};
unsigned char mCompressionMethod{0};
unsigned char mCompressionInfo{0};
unsigned char mFlagCheck{0};
unsigned char mFlagDict{0};
unsigned char mFlagLevel{0};
unsigned char mCheckValue{0};
};