Add some bit utils and initial l77 encoder.
This commit is contained in:
parent
ff962a6b16
commit
318b481ccc
12 changed files with 508 additions and 117 deletions
0
src/compression/Lz77Encoder.cpp
Normal file
0
src/compression/Lz77Encoder.cpp
Normal file
168
src/compression/Lz77Encoder.h
Normal file
168
src/compression/Lz77Encoder.h
Normal file
|
@ -0,0 +1,168 @@
|
|||
#pragma once
|
||||
|
||||
#include "StringUtils.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class Lz77Encoder
|
||||
{
|
||||
public:
|
||||
using DataStream = std::vector<char>;
|
||||
|
||||
unsigned lookAheadForMatchingChars(std::vector<char>& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc)
|
||||
{
|
||||
auto remaining_size = stream.size() - streamLoc;
|
||||
|
||||
unsigned num_hits{1};
|
||||
for (unsigned jdx=1; jdx< remaining_size; jdx++)
|
||||
{
|
||||
char buffer_char{0};
|
||||
if (searchIndex + jdx < mSearchBuffer.size())
|
||||
{
|
||||
buffer_char = mSearchBuffer[searchIndex + jdx];
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer_char = stream[jdx - hitOffset];
|
||||
}
|
||||
|
||||
auto lookahead_char = stream[streamLoc + jdx];
|
||||
if (lookahead_char == buffer_char)
|
||||
{
|
||||
matchBuffer.push_back(buffer_char);
|
||||
num_hits++;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
return num_hits;
|
||||
}
|
||||
|
||||
void lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc)
|
||||
{
|
||||
for(unsigned idx=0; idx<mSearchBuffer.size(); idx++)
|
||||
{
|
||||
auto search_index = mSearchBuffer.size() - idx - 1;
|
||||
|
||||
if (auto buffer_char = mSearchBuffer[search_index]; buffer_char == searchChar)
|
||||
{
|
||||
std::vector<char> match_buffer{buffer_char};
|
||||
auto num_hits = lookAheadForMatchingChars(match_buffer, search_index, idx, stream, streamLoc);
|
||||
|
||||
if (num_hits >= hitLength)
|
||||
{
|
||||
hitLength = num_hits;
|
||||
hitOffset = idx + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string encode(const std::string& stream)
|
||||
{
|
||||
unsigned loc{0};
|
||||
std::string ret;
|
||||
|
||||
while(loc < stream.size())
|
||||
{
|
||||
auto search_char = stream[loc];
|
||||
|
||||
unsigned hit_length{0};
|
||||
unsigned hit_offset{0};
|
||||
lookThroughSearchBuffer(search_char, hit_length, hit_offset, stream, loc);
|
||||
|
||||
if (hit_length > 0)
|
||||
{
|
||||
ret += "@" + std::to_string(hit_offset) + "L" + std::to_string(hit_length);
|
||||
loc+=hit_length;
|
||||
|
||||
auto hit_loc = mSearchBuffer.size() - hit_offset;
|
||||
for(unsigned idx=hit_loc; idx<hit_loc + hit_length; idx++)
|
||||
{
|
||||
mSearchBuffer.push_back(mSearchBuffer[idx]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret += search_char;
|
||||
mSearchBuffer.push_back(search_char);
|
||||
loc++;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string decode(const std::string& stream)
|
||||
{
|
||||
std::string ret;
|
||||
|
||||
unsigned loc{0};
|
||||
while(loc < stream.size())
|
||||
{
|
||||
auto working_char = stream[loc];
|
||||
if (working_char == '@')
|
||||
{
|
||||
unsigned loc_working = loc;
|
||||
|
||||
auto remainder = stream.size() - loc;
|
||||
std::string offset;
|
||||
|
||||
unsigned length_loc{0};
|
||||
for(unsigned jdx=0; jdx< remainder; jdx++)
|
||||
{
|
||||
loc++;
|
||||
|
||||
auto offset_char = stream[loc];
|
||||
if (offset_char == 'L')
|
||||
{
|
||||
loc++;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
offset += offset_char;
|
||||
}
|
||||
}
|
||||
unsigned offset_amount = std::stoul(offset);
|
||||
|
||||
std::string length;
|
||||
remainder = stream.size() - loc;
|
||||
|
||||
for(unsigned jdx=0; jdx< remainder; jdx++)
|
||||
{
|
||||
auto length_char = stream[loc];
|
||||
if (StringUtils::IsAlphabetical(length_char) || length_char == '@')
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
loc++;
|
||||
length += length_char;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned length_amount = std::stoul(length);
|
||||
|
||||
auto buffer_index = ret.size() - offset_amount;
|
||||
for(unsigned jdx=buffer_index;jdx<buffer_index+length_amount; jdx++)
|
||||
{
|
||||
ret += ret[jdx];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
loc++;
|
||||
ret += working_char;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
DataStream mSearchBuffer;
|
||||
DataStream mLookaheadBuffer;
|
||||
};
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "ByteUtils.h"
|
||||
#include "BitStream.h"
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
@ -10,12 +11,12 @@ class ZlibData
|
|||
public:
|
||||
void setByte(unsigned idx, unsigned char data)
|
||||
{
|
||||
mData[idx] = data;
|
||||
mBitStream.setByte(idx, data);
|
||||
}
|
||||
|
||||
void setDataSize(std::size_t size)
|
||||
{
|
||||
mData = std::vector<unsigned char>(size);
|
||||
mBitStream.setBufferSize(size);
|
||||
}
|
||||
|
||||
void setCompressionMethod(unsigned char method)
|
||||
|
@ -48,22 +49,22 @@ public:
|
|||
unsigned char ERROR = 0x03;
|
||||
|
||||
bool in_final_block = false;
|
||||
unsigned working_byte_id = 0;
|
||||
for (unsigned idx=0; idx<mData.size(); idx++)
|
||||
while(mBitStream.loadNextByte())
|
||||
{
|
||||
auto working_byte = mData[working_byte_id];
|
||||
std::cout << "Into process data, byte is: " << static_cast<int>(working_byte) << std::endl;
|
||||
auto working_byte = mBitStream.getCurrentByte();
|
||||
std::cout << "Into process data, byte is: " << static_cast<unsigned>(working_byte) << std::endl;
|
||||
|
||||
auto final_block = ByteUtils::getBitN(working_byte, 0);
|
||||
unsigned char final_block{0};
|
||||
mBitStream.getNextNBits(1, final_block);
|
||||
if (final_block)
|
||||
{
|
||||
std::cout << "Got final block" << std::endl;
|
||||
in_final_block = true;
|
||||
}
|
||||
|
||||
auto compress_type = ByteUtils::getTwoBitsAtN(working_byte, 1);
|
||||
std::cout << "Compress type byte is: " << static_cast<int>(compress_type) << std::endl;
|
||||
|
||||
unsigned char compress_type{0};
|
||||
mBitStream.getNextNBits(2, compress_type);
|
||||
std::cout << "Compress type byte is: " << static_cast<unsigned>(compress_type) << std::endl;
|
||||
if (compress_type == NO_COMPRESSION)
|
||||
{
|
||||
std::cout << "Got NO_COMPRESSION" << std::endl;
|
||||
|
@ -75,6 +76,12 @@ public:
|
|||
else if (compress_type == DYNAMIC_HUFFMAN)
|
||||
{
|
||||
std::cout << "Got DYNAMIC_HUFFMAN" << std::endl;
|
||||
|
||||
unsigned char h_list{0};
|
||||
mBitStream.getNextNBits(5, h_list);
|
||||
mHlist = h_list + 257;
|
||||
std::cout << "Got HLIST " << mHlist << std::endl;
|
||||
|
||||
}
|
||||
else if (compress_type == ERROR)
|
||||
{
|
||||
|
@ -85,7 +92,10 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
std::vector<unsigned char> mData;
|
||||
BitStream mBitStream;
|
||||
|
||||
unsigned mHlist{0};
|
||||
|
||||
unsigned char mCmf{0};
|
||||
unsigned char mFlg{0};
|
||||
unsigned char mCompressionMethod{0};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue