Add some bit utils and initial l77 encoder.

This commit is contained in:
James Grogan 2022-11-22 17:37:06 +00:00
parent ff962a6b16
commit 318b481ccc
12 changed files with 508 additions and 117 deletions

View file

View file

@ -0,0 +1,168 @@
#pragma once
#include "StringUtils.h"
#include <string>
#include <vector>
class Lz77Encoder
{
public:
using DataStream = std::vector<char>;
unsigned lookAheadForMatchingChars(std::vector<char>& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc)
{
auto remaining_size = stream.size() - streamLoc;
unsigned num_hits{1};
for (unsigned jdx=1; jdx< remaining_size; jdx++)
{
char buffer_char{0};
if (searchIndex + jdx < mSearchBuffer.size())
{
buffer_char = mSearchBuffer[searchIndex + jdx];
}
else
{
buffer_char = stream[jdx - hitOffset];
}
auto lookahead_char = stream[streamLoc + jdx];
if (lookahead_char == buffer_char)
{
matchBuffer.push_back(buffer_char);
num_hits++;
}
else
{
break;
}
}
return num_hits;
}
void lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc)
{
for(unsigned idx=0; idx<mSearchBuffer.size(); idx++)
{
auto search_index = mSearchBuffer.size() - idx - 1;
if (auto buffer_char = mSearchBuffer[search_index]; buffer_char == searchChar)
{
std::vector<char> match_buffer{buffer_char};
auto num_hits = lookAheadForMatchingChars(match_buffer, search_index, idx, stream, streamLoc);
if (num_hits >= hitLength)
{
hitLength = num_hits;
hitOffset = idx + 1;
}
}
}
}
std::string encode(const std::string& stream)
{
unsigned loc{0};
std::string ret;
while(loc < stream.size())
{
auto search_char = stream[loc];
unsigned hit_length{0};
unsigned hit_offset{0};
lookThroughSearchBuffer(search_char, hit_length, hit_offset, stream, loc);
if (hit_length > 0)
{
ret += "@" + std::to_string(hit_offset) + "L" + std::to_string(hit_length);
loc+=hit_length;
auto hit_loc = mSearchBuffer.size() - hit_offset;
for(unsigned idx=hit_loc; idx<hit_loc + hit_length; idx++)
{
mSearchBuffer.push_back(mSearchBuffer[idx]);
}
}
else
{
ret += search_char;
mSearchBuffer.push_back(search_char);
loc++;
}
}
return ret;
}
std::string decode(const std::string& stream)
{
std::string ret;
unsigned loc{0};
while(loc < stream.size())
{
auto working_char = stream[loc];
if (working_char == '@')
{
unsigned loc_working = loc;
auto remainder = stream.size() - loc;
std::string offset;
unsigned length_loc{0};
for(unsigned jdx=0; jdx< remainder; jdx++)
{
loc++;
auto offset_char = stream[loc];
if (offset_char == 'L')
{
loc++;
break;
}
else
{
offset += offset_char;
}
}
unsigned offset_amount = std::stoul(offset);
std::string length;
remainder = stream.size() - loc;
for(unsigned jdx=0; jdx< remainder; jdx++)
{
auto length_char = stream[loc];
if (StringUtils::IsAlphabetical(length_char) || length_char == '@')
{
break;
}
else
{
loc++;
length += length_char;
}
}
unsigned length_amount = std::stoul(length);
auto buffer_index = ret.size() - offset_amount;
for(unsigned jdx=buffer_index;jdx<buffer_index+length_amount; jdx++)
{
ret += ret[jdx];
}
}
else
{
loc++;
ret += working_char;
}
}
return ret;
}
DataStream mSearchBuffer;
DataStream mLookaheadBuffer;
};

View file

@ -1,6 +1,7 @@
#pragma once
#include "ByteUtils.h"
#include "BitStream.h"
#include <vector>
#include <iostream>
@ -10,12 +11,12 @@ class ZlibData
public:
void setByte(unsigned idx, unsigned char data)
{
mData[idx] = data;
mBitStream.setByte(idx, data);
}
void setDataSize(std::size_t size)
{
mData = std::vector<unsigned char>(size);
mBitStream.setBufferSize(size);
}
void setCompressionMethod(unsigned char method)
@ -48,22 +49,22 @@ public:
unsigned char ERROR = 0x03;
bool in_final_block = false;
unsigned working_byte_id = 0;
for (unsigned idx=0; idx<mData.size(); idx++)
while(mBitStream.loadNextByte())
{
auto working_byte = mData[working_byte_id];
std::cout << "Into process data, byte is: " << static_cast<int>(working_byte) << std::endl;
auto working_byte = mBitStream.getCurrentByte();
std::cout << "Into process data, byte is: " << static_cast<unsigned>(working_byte) << std::endl;
auto final_block = ByteUtils::getBitN(working_byte, 0);
unsigned char final_block{0};
mBitStream.getNextNBits(1, final_block);
if (final_block)
{
std::cout << "Got final block" << std::endl;
in_final_block = true;
}
auto compress_type = ByteUtils::getTwoBitsAtN(working_byte, 1);
std::cout << "Compress type byte is: " << static_cast<int>(compress_type) << std::endl;
unsigned char compress_type{0};
mBitStream.getNextNBits(2, compress_type);
std::cout << "Compress type byte is: " << static_cast<unsigned>(compress_type) << std::endl;
if (compress_type == NO_COMPRESSION)
{
std::cout << "Got NO_COMPRESSION" << std::endl;
@ -75,6 +76,12 @@ public:
else if (compress_type == DYNAMIC_HUFFMAN)
{
std::cout << "Got DYNAMIC_HUFFMAN" << std::endl;
unsigned char h_list{0};
mBitStream.getNextNBits(5, h_list);
mHlist = h_list + 257;
std::cout << "Got HLIST " << mHlist << std::endl;
}
else if (compress_type == ERROR)
{
@ -85,7 +92,10 @@ public:
}
private:
std::vector<unsigned char> mData;
BitStream mBitStream;
unsigned mHlist{0};
unsigned char mCmf{0};
unsigned char mFlg{0};
unsigned char mCompressionMethod{0};

163
src/core/ByteUtils.cpp Normal file
View file

@ -0,0 +1,163 @@
#include "ByteUtils.h"
bool ByteUtils::MostSignificantBitIsOne(char c)
{
return c & (1 << 7);
}
ByteUtils::Word ByteUtils::GetWordFirstBit(const Word word)
{
return word & ByteUtils::WORD_FIRST_BIT;
};
ByteUtils::Word ByteUtils::GetWordLastByte(const Word word)
{
return word & ByteUtils::WORD_LAST_BYTE;
}
unsigned char ByteUtils::getHigherNBits(unsigned char input, unsigned num)
{
return input >> 8 - num;
}
unsigned char ByteUtils::getLowerNBits(unsigned char input, unsigned num)
{
switch (num)
{
case 1:
return input & 0x01;
case 2:
return input & 0x03;
case 3:
return input & 0x07;
case 4:
return input & 0x0F;
case 5:
return input & 0x1F;
case 6:
return input & 0x3F;
case 7:
return input & 0x7F;
case 8:
return input;
default:
return 0;
}
}
unsigned char ByteUtils::getTwoBitsAtN(unsigned char input, unsigned n)
{
return (input & (0x03 << n)) >> n;
}
unsigned char ByteUtils::getMBitsAtN(unsigned char input, unsigned m, unsigned n)
{
switch (m)
{
case 1:
return (input & (0x01 << n)) >> n;
case 2:
return (input & (0x03 << n)) >> n;
case 3:
return (input & (0x07 << n)) >> n;
case 4:
return (input & (0x0F << n)) >> n;
case 5:
return (input & (0x1F << n)) >> n;
case 6:
return (input & (0x3F << n)) >> n;
case 7:
return (input & (0x7F << n)) >> n;
case 8:
return input;
default:
return 0;
}
}
unsigned char ByteUtils::getBitN(unsigned char input, unsigned n)
{
return input & (1 << n);
}
unsigned char ByteUtils::getFromString(const std::string& string)
{
unsigned char ret{0};
if (string.length() < 8)
{
return ret;
}
for(unsigned idx=0; idx<8; idx++)
{
if (string[idx] == '1')
{
ret |= (0x01 << (7 - idx));
}
}
return ret;
}
std::string ByteUtils::toString(unsigned char c)
{
std::string ret;
for(unsigned idx=0; idx<8; idx++)
{
ret += getBitN(c, 7 - idx) ? '1' : '0';
}
return ret;
}
void ByteUtils::ReverseBuffer(char* buffer, char* reverse, unsigned size, unsigned targetSize)
{
for(unsigned idx=0; idx<targetSize; idx++)
{
if (idx < size)
{
reverse[idx] = buffer[size - 1 -idx];
}
else
{
reverse[idx] = 0;
}
}
}
ByteUtils::Word ByteUtils::ToWord(char* buffer, bool reverse)
{
return ToType<Word>(buffer, reverse);
}
ByteUtils::DWord ByteUtils::ToDWord(char* buffer, bool reverse)
{
return ToType<DWord>(buffer, reverse);
}
ByteUtils::QWord ByteUtils::ToQWord(char* buffer, bool reverse)
{
return ToType<QWord>(buffer, reverse);
}
bool ByteUtils::Compare(char* buffer, const char* tag, unsigned size)
{
for(unsigned idx=0; idx<size; idx++)
{
if(tag[idx] != buffer[idx])
{
return false;
}
}
return true;
}
bool ByteUtils::CompareDWords(char* buffer, const char* tag)
{
return Compare(buffer, tag, sizeof(DWord));
}
bool ByteUtils::CompareWords(char* buffer, const char* tag)
{
return Compare(buffer, tag, sizeof(Word));
}

View file

@ -2,6 +2,7 @@
#include <cstring>
#include <stdint.h>
#include <string>
class ByteUtils
{
@ -10,75 +11,27 @@ public:
using DWord = int32_t;
using QWord = int64_t;
static bool MostSignificantBitIsOne(char c)
{
return c & (1 << 7);
}
static bool MostSignificantBitIsOne(char c);
static Word GetWordFirstBit(const Word word)
{
return word & ByteUtils::WORD_FIRST_BIT;
};
static Word GetWordFirstBit(const Word word);
static Word GetWordLastByte(const Word word)
{
return word & ByteUtils::WORD_LAST_BYTE;
}
static Word GetWordLastByte(const Word word);
static unsigned char getHigherNBits(unsigned char input, unsigned num)
{
return input >> 8 - num;
}
static unsigned char getHigherNBits(unsigned char input, unsigned num);
static unsigned char getLowerNBits(unsigned char input, unsigned num)
{
switch (num)
{
case 1:
return input & 0x01;
case 2:
return input & 0x03;
case 3:
return input & 0x07;
case 4:
return input & 0x0F;
case 5:
return input & 0x1F;
case 6:
return input & 0x3F;
case 7:
return input & 0x7F;
case 8:
return input;
default:
return 0;
}
}
static unsigned char getLowerNBits(unsigned char input, unsigned num);
static unsigned char getTwoBitsAtN(unsigned char input, unsigned n)
{
return (input & (0x03 << n)) >> n;
}
static unsigned char getTwoBitsAtN(unsigned char input, unsigned n);
static unsigned char getBitN(unsigned char input, unsigned n)
{
return input & (1 << n);
}
static unsigned char getMBitsAtN(unsigned char input, unsigned m, unsigned n);
static void ReverseBuffer(char* buffer, char* reverse, unsigned size, unsigned targetSize)
{
for(unsigned idx=0; idx<targetSize; idx++)
{
if (idx < size)
{
reverse[idx] = buffer[size - 1 -idx];
}
else
{
reverse[idx] = 0;
}
}
}
static unsigned char getBitN(unsigned char input, unsigned n);
static unsigned char getFromString(const std::string& string);
static std::string toString(unsigned char c);
static void ReverseBuffer(char* buffer, char* reverse, unsigned size, unsigned targetSize);
template<typename T>
static T ToType(char* buffer, bool reverse = true)
@ -97,42 +50,17 @@ public:
return result;
}
static Word ToWord(char* buffer, bool reverse = true)
{
return ToType<Word>(buffer, reverse);
}
static Word ToWord(char* buffer, bool reverse = true);
static DWord ToDWord(char* buffer, bool reverse = true)
{
return ToType<DWord>(buffer, reverse);
}
static DWord ToDWord(char* buffer, bool reverse = true);
static QWord ToQWord(char* buffer, bool reverse = true)
{
return ToType<QWord>(buffer, reverse);
}
static QWord ToQWord(char* buffer, bool reverse = true);
static bool Compare(char* buffer, const char* tag, unsigned size)
{
for(unsigned idx=0; idx<size; idx++)
{
if(tag[idx] != buffer[idx])
{
return false;
}
}
return true;
}
static bool Compare(char* buffer, const char* tag, unsigned size);
static bool CompareDWords(char* buffer, const char* tag)
{
return Compare(buffer, tag, sizeof(DWord));
}
static bool CompareDWords(char* buffer, const char* tag);
static bool CompareWords(char* buffer, const char* tag)
{
return Compare(buffer, tag, sizeof(Word));
}
static bool CompareWords(char* buffer, const char* tag);
static const int BYTE_FIRST_BIT = 0x40; // 1000 0000
static const Word WORD_FIRST_BIT = 0x8000; // 1000 0000 - 0000 0000

View file

@ -14,6 +14,7 @@ list(APPEND core_HEADERS
)
list(APPEND core_LIB_INCLUDES
ByteUtils.cpp
Event.cpp
Dictionary.cpp
Color.cpp
@ -28,6 +29,7 @@ list(APPEND core_LIB_INCLUDES
RandomUtils.cpp
StringUtils.cpp
streams/BinaryStream.cpp
streams/BitStream.cpp
http/HttpResponse.cpp
http/HttpHeader.cpp
http/HttpRequest.cpp

View file

@ -0,0 +1,56 @@
#include "BitStream.h"
#include "ByteUtils.h"
bool BitStream::loadNextByte()
{
if (mByteOffset + 1 == mBuffer.size())
{
return false;
}
else
{
mByteOffset++;
mCurrentByte = mBuffer[mByteOffset];
return true;
}
}
bool BitStream::getNextNBits(unsigned n, unsigned char& buffer)
{
int overshoot = n + mBitOffset - 7;
if (overshoot > 0)
{
unsigned char last_byte = mCurrentByte;
if (!loadNextByte())
{
return false;
}
auto num_lower = 7 - mBitOffset;
char lower_bits = ByteUtils::getHigherNBits(last_byte, num_lower);
char higher_bits = ByteUtils::getLowerNBits(mCurrentByte, overshoot);
buffer = (higher_bits << (8 - num_lower)) | (lower_bits >> mBitOffset);
mBitOffset = overshoot;
return true;
}
else
{
buffer = ByteUtils::getMBitsAtN(mCurrentByte, n, mBitOffset);
mBitOffset += n;
return true;
}
}
void BitStream::setByte(unsigned idx, unsigned char data)
{
mBuffer[idx] = data;
}
void BitStream::setBufferSize(std::size_t size)
{
mBuffer = std::vector<unsigned char>(size);
}

View file

@ -0,0 +1,27 @@
#pragma once
#include <vector>
class BitStream
{
public:
bool getNextNBits(unsigned n, unsigned char& buffer);
bool loadNextByte();
void setByte(unsigned idx, unsigned char data);
void setBufferSize(std::size_t size);
unsigned char getCurrentByte() const
{
return mCurrentByte;
}
private:
unsigned mByteOffset{0};
unsigned mBitOffset{0};
char mCurrentByte{0};
std::vector<unsigned char> mBuffer;
};

View file

@ -10,6 +10,7 @@ target_include_directories(test_utils PUBLIC
list(APPEND TestFiles
audio/TestAudioWriter.cpp
audio/TestMidiReader.cpp
core/TestByteUtils.cpp
core/TestBinaryStream.cpp
core/TestTomlReader.cpp
compiler/TestLexer.cpp

View file

@ -2,6 +2,7 @@
#include "HuffmanEncoder.h"
#include "RunLengthEncoder.h"
#include "Lz77Encoder.h"
void test_run_length_encoder()
{
@ -34,13 +35,30 @@ void test_huffman_encoder()
HuffmanEncoder encoder;
encoder.encode(counts);
}
void test_lz77_encoder()
{
std::string test_data = "sir sid eastman easily teases sea sick seals";
//std::string test_data = "sir sid eastman";
Lz77Encoder encoder;
auto encoded = encoder.encode(test_data);
std::cout << "Encoded: " << encoded << std::endl;
//auto decoded = encoder.decode(encoded);
//std::cout << "Decoded: " << decoded << std::endl;
}
int main()
{
test_huffman_encoder();
//test_huffman_encoder();
//test_run_length_encoder();
test_lz77_encoder();
return 0;
}

View file

@ -0,0 +1,18 @@
#include "ByteUtils.h"
#include <iostream>
int main()
{
auto byte = ByteUtils::getFromString("00110101");
std::cout << "Value is " << static_cast<unsigned>(byte) << std::endl;
auto string_rep = ByteUtils::toString(byte);
std::cout << "String rep is " << string_rep << std::endl;
auto slice = ByteUtils::getMBitsAtN(byte, 3, 3);
std::cout << "Slice is " << ByteUtils::toString(slice) << std::endl;
return 0;
}

View file

@ -5,17 +5,17 @@
int main()
{
const auto data_loc = std::filesystem::path(__FILE__) / "../../data";
const auto sample_toml_file = data_loc / "sample_toml.toml";
const auto data_loc = std::filesystem::path(__FILE__) / "../../data";
const auto sample_toml_file = data_loc / "sample_toml.toml";
auto reader = TomlReader();
reader.read(sample_toml_file);
auto reader = TomlReader();
reader.read(sample_toml_file);
auto themes_table = reader.getContent()->getTable("themes");
for (const auto& items : themes_table->getKeyValuePairs())
{
std::cout << "Got entry with key: " << items.first << " and val " << items.second << std::endl;
}
auto themes_table = reader.getContent()->getTable("themes");
for (const auto& items : themes_table->getKeyValuePairs())
{
std::cout << "Got entry with key: " << items.first << " and val " << items.second << std::endl;
}
return 0;
}
return 0;
}