Some encode/decode practice.
This commit is contained in:
parent
8a41337e2d
commit
ff962a6b16
29 changed files with 727 additions and 305 deletions
|
@ -2,6 +2,8 @@
|
|||
list(APPEND compression_LIB_INCLUDES
|
||||
StreamCompressor.cpp
|
||||
HuffmanEncoder.cpp
|
||||
RunLengthEncoder.cpp
|
||||
ZlibData.cpp
|
||||
)
|
||||
|
||||
add_library(compression SHARED ${compression_LIB_INCLUDES})
|
||||
|
|
|
@ -1,77 +1,100 @@
|
|||
#include "HuffmanEncoder.h"
|
||||
|
||||
#include "Tree.h"
|
||||
#include "RawTree.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <queue>
|
||||
#include <tuple>
|
||||
#include <iostream>
|
||||
|
||||
void HuffmanEncoder::dumpNode(RawNode<CountPair>* node, unsigned depth) const
|
||||
{
|
||||
if (!node)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto data = node->getData();
|
||||
|
||||
std::string prefix(depth, '_');
|
||||
|
||||
if (node->isLeaf())
|
||||
{
|
||||
std::cout << prefix << "Leaf with value: " << data.first << " and sum " << data.second << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << prefix << "Intermediate with sum " << data.second << std::endl;
|
||||
std::cout << prefix << "Doing Left.." << std::endl;
|
||||
dumpNode(node->getLeftChild(), depth+1);
|
||||
|
||||
std::cout << prefix << "Doing Right.." << std::endl;
|
||||
dumpNode(node->getRightChild(), depth+1);
|
||||
|
||||
std::cout << prefix << "*****" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void HuffmanEncoder::dumpTree(const RawTree<CountPair>& tree) const
|
||||
{
|
||||
dumpNode(tree.getRootNode(), 0);
|
||||
}
|
||||
|
||||
void HuffmanEncoder::encode(const HuffmanEncoder::DataStream& stream)
|
||||
{
|
||||
std::unordered_map<unsigned char, unsigned> counts;
|
||||
for (auto c : stream)
|
||||
{
|
||||
counts[c]++;
|
||||
}
|
||||
|
||||
using CountPair = std::pair<unsigned char, unsigned>;
|
||||
auto cmp = [](CountPair left, CountPair right)
|
||||
{
|
||||
return left.second > right.second;
|
||||
};
|
||||
std::priority_queue<CountPair, std::vector<CountPair>, decltype(cmp)> q(cmp);
|
||||
for (const auto& entry : counts)
|
||||
{
|
||||
q.push({entry.first, entry.second});
|
||||
}
|
||||
|
||||
NodePtr<CountPair> lastNode;
|
||||
while(!q.empty())
|
||||
{
|
||||
const auto charData = q.top();
|
||||
auto characterNode = std::make_unique<Node<CountPair> >(charData);
|
||||
q.pop();
|
||||
|
||||
if (!lastNode)
|
||||
{
|
||||
const auto rightCharData = q.top();
|
||||
auto rightCharacterNode = std::make_unique<Node<CountPair> >(rightCharData);
|
||||
q.pop();
|
||||
|
||||
const auto sum = charData.second + rightCharData.second;
|
||||
CountPair data{0, sum};
|
||||
auto midNode = std::make_unique<Node<CountPair> >(data);
|
||||
|
||||
midNode->addChild(std::move(characterNode));
|
||||
midNode->addChild(std::move(rightCharacterNode));
|
||||
lastNode = std::move(midNode);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto sum = lastNode->getData().second;
|
||||
CountPair data{0, sum};
|
||||
auto midNode = std::make_unique<Node<CountPair> >(data);
|
||||
|
||||
if (charData.second < lastNode->getData().second)
|
||||
{
|
||||
midNode->addChild(std::move(lastNode));
|
||||
midNode->addChild(std::move(characterNode));
|
||||
}
|
||||
else
|
||||
{
|
||||
midNode->addChild(std::move(characterNode));
|
||||
midNode->addChild(std::move(lastNode));
|
||||
}
|
||||
lastNode = std::move(midNode);
|
||||
}
|
||||
}
|
||||
|
||||
Tree<CountPair> tree;
|
||||
tree.addRootNode(std::move(lastNode));
|
||||
|
||||
//using TableEntry = std::tuple<>
|
||||
|
||||
|
||||
std::cout << "********" << std::endl;
|
||||
std::unordered_map<unsigned char, unsigned> counts;
|
||||
for (auto c : stream)
|
||||
{
|
||||
counts[c]++;
|
||||
}
|
||||
encode(counts);
|
||||
}
|
||||
|
||||
void HuffmanEncoder::encode(const std::unordered_map<unsigned char, unsigned>& counts)
|
||||
{
|
||||
std::cout << "Counts" << std::endl;
|
||||
for (const auto& data: counts)
|
||||
{
|
||||
std::cout << data.first << " | " << data.second << std::endl;
|
||||
}
|
||||
std::cout << "*******" << std::endl;
|
||||
|
||||
auto cmp = [](RawNode<CountPair>* left, RawNode<CountPair>* right)
|
||||
{
|
||||
return left->getData().second > right->getData().second;
|
||||
};
|
||||
|
||||
std::priority_queue<RawNode<CountPair>*, std::vector<RawNode<CountPair>* >, decltype(cmp)> q(cmp);
|
||||
for (const auto& entry : counts)
|
||||
{
|
||||
q.push(new RawNode<CountPair>(entry));
|
||||
}
|
||||
|
||||
while(q.size() > 1)
|
||||
{
|
||||
auto node0 = q.top();
|
||||
q.pop();
|
||||
|
||||
auto node1 = q.top();
|
||||
q.pop();
|
||||
|
||||
const auto sum = node0->getData().second + node1->getData().second;
|
||||
auto new_node = new RawNode<CountPair>(CountPair{0, sum});
|
||||
|
||||
new_node->addChild(node0);
|
||||
new_node->addChild(node1);
|
||||
q.push(new_node);
|
||||
}
|
||||
|
||||
auto root = q.top();
|
||||
q.pop();
|
||||
|
||||
RawTree<CountPair> tree;
|
||||
tree.addRootNode(root);
|
||||
|
||||
//using TableEntry = std::tuple<>
|
||||
|
||||
dumpTree(tree);
|
||||
|
||||
std::cout << "********" << std::endl;
|
||||
}
|
||||
|
|
|
@ -1,11 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#include "RawTree.h"
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
class HuffmanEncoder
|
||||
{
|
||||
using DataStream = std::vector<unsigned char>;
|
||||
using CountPair = std::pair<unsigned char, unsigned>;
|
||||
|
||||
public:
|
||||
void encode(const DataStream& stream);
|
||||
void encode(const DataStream& stream);
|
||||
|
||||
void encode(const std::unordered_map<unsigned char, unsigned>& counts);
|
||||
|
||||
private:
|
||||
void dumpTree(const RawTree<CountPair>& tree) const;
|
||||
void dumpNode(RawNode<CountPair>* node, unsigned depth) const;
|
||||
};
|
||||
|
|
0
src/compression/RunLengthEncoder.cpp
Normal file
0
src/compression/RunLengthEncoder.cpp
Normal file
110
src/compression/RunLengthEncoder.h
Normal file
110
src/compression/RunLengthEncoder.h
Normal file
|
@ -0,0 +1,110 @@
|
|||
#pragma once
|
||||
|
||||
#include "StringUtils.h"
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
class RunLengthEncoder
|
||||
{
|
||||
public:
|
||||
std::string encode(const std::string& string)
|
||||
{
|
||||
std::string ret;
|
||||
if (string.empty())
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
char working_char{0};
|
||||
unsigned count = 1;
|
||||
for(unsigned idx=0; idx<string.size(); idx++)
|
||||
{
|
||||
auto c = string[idx];
|
||||
if (idx == 0)
|
||||
{
|
||||
working_char = c;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == working_char)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
insertCharacter(ret, working_char, count);
|
||||
working_char = c;
|
||||
count = 1;
|
||||
}
|
||||
}
|
||||
insertCharacter(ret, working_char, count);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string decode(const std::string& string)
|
||||
{
|
||||
std::string ret;
|
||||
if (string.empty())
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned count{0};
|
||||
while(count < string.size())
|
||||
{
|
||||
auto c = string[count];
|
||||
if (c == mDelimiter)
|
||||
{
|
||||
count++;
|
||||
std::string reps;
|
||||
char working_char{0};
|
||||
while(count < string.size())
|
||||
{
|
||||
auto rep_char = string[count];
|
||||
count++;
|
||||
if (StringUtils::IsAlphabetical(rep_char))
|
||||
{
|
||||
working_char = rep_char;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
reps += rep_char;
|
||||
}
|
||||
}
|
||||
for (unsigned idx=0; idx<std::stoul(reps); idx++)
|
||||
{
|
||||
ret += working_char;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret += c;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void insertCharacter(std::string& output, char c, unsigned count)
|
||||
{
|
||||
if (count >= 3)
|
||||
{
|
||||
output += mDelimiter + std::to_string(count) + c;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned jdx=0;jdx<count; jdx++)
|
||||
{
|
||||
output += c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
char mDelimiter {'@'};
|
||||
|
||||
};
|
0
src/compression/ZlibData.cpp
Normal file
0
src/compression/ZlibData.cpp
Normal file
97
src/compression/ZlibData.h
Normal file
97
src/compression/ZlibData.h
Normal file
|
@ -0,0 +1,97 @@
|
|||
#pragma once
|
||||
|
||||
#include "ByteUtils.h"
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
class ZlibData
|
||||
{
|
||||
public:
|
||||
void setByte(unsigned idx, unsigned char data)
|
||||
{
|
||||
mData[idx] = data;
|
||||
}
|
||||
|
||||
void setDataSize(std::size_t size)
|
||||
{
|
||||
mData = std::vector<unsigned char>(size);
|
||||
}
|
||||
|
||||
void setCompressionMethod(unsigned char method)
|
||||
{
|
||||
std::cout << "Got compression input " << static_cast<int>(method) << std::endl;
|
||||
mCmf = method;
|
||||
mCompressionMethod = ByteUtils::getLowerNBits(method, 4);
|
||||
mCompressionInfo = ByteUtils::getHigherNBits(method, 4);
|
||||
|
||||
std::cout << "Got compression method " << static_cast<int>(mCompressionMethod) << " and info " << static_cast<int>(mCompressionInfo) << std::endl;
|
||||
}
|
||||
|
||||
void setExtraFlags(unsigned char extraFlags)
|
||||
{
|
||||
std::cout << "Got flags " << static_cast<int>(extraFlags) << std::endl;
|
||||
|
||||
mFlg = extraFlags;
|
||||
mFlagCheck = ByteUtils::getLowerNBits(extraFlags, 5);
|
||||
mFlagDict = ByteUtils::getBitN(extraFlags, 5);
|
||||
mFlagLevel = ByteUtils::getHigherNBits(extraFlags, 2);
|
||||
|
||||
std::cout << "Got flag check " << static_cast<int>(mFlagCheck) << " and dict " << static_cast<int>(mFlagDict) << " and level " << static_cast<int>(mFlagLevel) << std::endl;
|
||||
}
|
||||
|
||||
void processData()
|
||||
{
|
||||
unsigned char NO_COMPRESSION = 0x00;
|
||||
unsigned char FIXED_HUFFMAN = 0x01;
|
||||
unsigned char DYNAMIC_HUFFMAN = 0x02;
|
||||
unsigned char ERROR = 0x03;
|
||||
|
||||
bool in_final_block = false;
|
||||
unsigned working_byte_id = 0;
|
||||
for (unsigned idx=0; idx<mData.size(); idx++)
|
||||
{
|
||||
auto working_byte = mData[working_byte_id];
|
||||
std::cout << "Into process data, byte is: " << static_cast<int>(working_byte) << std::endl;
|
||||
|
||||
auto final_block = ByteUtils::getBitN(working_byte, 0);
|
||||
if (final_block)
|
||||
{
|
||||
std::cout << "Got final block" << std::endl;
|
||||
in_final_block = true;
|
||||
}
|
||||
|
||||
auto compress_type = ByteUtils::getTwoBitsAtN(working_byte, 1);
|
||||
std::cout << "Compress type byte is: " << static_cast<int>(compress_type) << std::endl;
|
||||
|
||||
if (compress_type == NO_COMPRESSION)
|
||||
{
|
||||
std::cout << "Got NO_COMPRESSION" << std::endl;
|
||||
}
|
||||
else if (compress_type == FIXED_HUFFMAN)
|
||||
{
|
||||
std::cout << "Got FIXED_HUFFMAN" << std::endl;
|
||||
}
|
||||
else if (compress_type == DYNAMIC_HUFFMAN)
|
||||
{
|
||||
std::cout << "Got DYNAMIC_HUFFMAN" << std::endl;
|
||||
}
|
||||
else if (compress_type == ERROR)
|
||||
{
|
||||
std::cout << "Got ERROR" << std::endl;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<unsigned char> mData;
|
||||
unsigned char mCmf{0};
|
||||
unsigned char mFlg{0};
|
||||
unsigned char mCompressionMethod{0};
|
||||
unsigned char mCompressionInfo{0};
|
||||
unsigned char mFlagCheck{0};
|
||||
unsigned char mFlagDict{0};
|
||||
unsigned char mFlagLevel{0};
|
||||
unsigned char mCheckValue{0};
|
||||
};
|
Loading…
Add table
Add a link
Reference in a new issue