Add some bit utils and initial l77 encoder.

This commit is contained in:
James Grogan 2022-11-22 17:37:06 +00:00
parent ff962a6b16
commit 318b481ccc
12 changed files with 508 additions and 117 deletions

View file

View file

@ -0,0 +1,168 @@
#pragma once
#include "StringUtils.h"
#include <string>
#include <vector>
class Lz77Encoder
{
public:
using DataStream = std::vector<char>;
unsigned lookAheadForMatchingChars(std::vector<char>& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc)
{
auto remaining_size = stream.size() - streamLoc;
unsigned num_hits{1};
for (unsigned jdx=1; jdx< remaining_size; jdx++)
{
char buffer_char{0};
if (searchIndex + jdx < mSearchBuffer.size())
{
buffer_char = mSearchBuffer[searchIndex + jdx];
}
else
{
buffer_char = stream[jdx - hitOffset];
}
auto lookahead_char = stream[streamLoc + jdx];
if (lookahead_char == buffer_char)
{
matchBuffer.push_back(buffer_char);
num_hits++;
}
else
{
break;
}
}
return num_hits;
}
void lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc)
{
for(unsigned idx=0; idx<mSearchBuffer.size(); idx++)
{
auto search_index = mSearchBuffer.size() - idx - 1;
if (auto buffer_char = mSearchBuffer[search_index]; buffer_char == searchChar)
{
std::vector<char> match_buffer{buffer_char};
auto num_hits = lookAheadForMatchingChars(match_buffer, search_index, idx, stream, streamLoc);
if (num_hits >= hitLength)
{
hitLength = num_hits;
hitOffset = idx + 1;
}
}
}
}
std::string encode(const std::string& stream)
{
unsigned loc{0};
std::string ret;
while(loc < stream.size())
{
auto search_char = stream[loc];
unsigned hit_length{0};
unsigned hit_offset{0};
lookThroughSearchBuffer(search_char, hit_length, hit_offset, stream, loc);
if (hit_length > 0)
{
ret += "@" + std::to_string(hit_offset) + "L" + std::to_string(hit_length);
loc+=hit_length;
auto hit_loc = mSearchBuffer.size() - hit_offset;
for(unsigned idx=hit_loc; idx<hit_loc + hit_length; idx++)
{
mSearchBuffer.push_back(mSearchBuffer[idx]);
}
}
else
{
ret += search_char;
mSearchBuffer.push_back(search_char);
loc++;
}
}
return ret;
}
std::string decode(const std::string& stream)
{
std::string ret;
unsigned loc{0};
while(loc < stream.size())
{
auto working_char = stream[loc];
if (working_char == '@')
{
unsigned loc_working = loc;
auto remainder = stream.size() - loc;
std::string offset;
unsigned length_loc{0};
for(unsigned jdx=0; jdx< remainder; jdx++)
{
loc++;
auto offset_char = stream[loc];
if (offset_char == 'L')
{
loc++;
break;
}
else
{
offset += offset_char;
}
}
unsigned offset_amount = std::stoul(offset);
std::string length;
remainder = stream.size() - loc;
for(unsigned jdx=0; jdx< remainder; jdx++)
{
auto length_char = stream[loc];
if (StringUtils::IsAlphabetical(length_char) || length_char == '@')
{
break;
}
else
{
loc++;
length += length_char;
}
}
unsigned length_amount = std::stoul(length);
auto buffer_index = ret.size() - offset_amount;
for(unsigned jdx=buffer_index;jdx<buffer_index+length_amount; jdx++)
{
ret += ret[jdx];
}
}
else
{
loc++;
ret += working_char;
}
}
return ret;
}
DataStream mSearchBuffer;
DataStream mLookaheadBuffer;
};

View file

@ -1,6 +1,7 @@
#pragma once
#include "ByteUtils.h"
#include "BitStream.h"
#include <vector>
#include <iostream>
@ -10,12 +11,12 @@ class ZlibData
public:
void setByte(unsigned idx, unsigned char data)
{
mData[idx] = data;
mBitStream.setByte(idx, data);
}
void setDataSize(std::size_t size)
{
mData = std::vector<unsigned char>(size);
mBitStream.setBufferSize(size);
}
void setCompressionMethod(unsigned char method)
@ -48,22 +49,22 @@ public:
unsigned char ERROR = 0x03;
bool in_final_block = false;
unsigned working_byte_id = 0;
for (unsigned idx=0; idx<mData.size(); idx++)
while(mBitStream.loadNextByte())
{
auto working_byte = mData[working_byte_id];
std::cout << "Into process data, byte is: " << static_cast<int>(working_byte) << std::endl;
auto working_byte = mBitStream.getCurrentByte();
std::cout << "Into process data, byte is: " << static_cast<unsigned>(working_byte) << std::endl;
auto final_block = ByteUtils::getBitN(working_byte, 0);
unsigned char final_block{0};
mBitStream.getNextNBits(1, final_block);
if (final_block)
{
std::cout << "Got final block" << std::endl;
in_final_block = true;
}
auto compress_type = ByteUtils::getTwoBitsAtN(working_byte, 1);
std::cout << "Compress type byte is: " << static_cast<int>(compress_type) << std::endl;
unsigned char compress_type{0};
mBitStream.getNextNBits(2, compress_type);
std::cout << "Compress type byte is: " << static_cast<unsigned>(compress_type) << std::endl;
if (compress_type == NO_COMPRESSION)
{
std::cout << "Got NO_COMPRESSION" << std::endl;
@ -75,6 +76,12 @@ public:
else if (compress_type == DYNAMIC_HUFFMAN)
{
std::cout << "Got DYNAMIC_HUFFMAN" << std::endl;
unsigned char h_list{0};
mBitStream.getNextNBits(5, h_list);
mHlist = h_list + 257;
std::cout << "Got HLIST " << mHlist << std::endl;
}
else if (compress_type == ERROR)
{
@ -85,7 +92,10 @@ public:
}
private:
std::vector<unsigned char> mData;
BitStream mBitStream;
unsigned mHlist{0};
unsigned char mCmf{0};
unsigned char mFlg{0};
unsigned char mCompressionMethod{0};