330 lines
8.4 KiB
C++
330 lines
8.4 KiB
C++
#include "Lz77Encoder.h"
|
|
|
|
#include "StringUtils.h"
|
|
#include "BitStream.h"
|
|
#include "Bits.h"
|
|
#include "HuffmanEncoder.h"
|
|
|
|
#include <iostream>
|
|
|
|
Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream)
|
|
: AbstractEncoder(inputStream, outputStream),
|
|
mSearchBuffer(mSearchBufferSize),
|
|
mLookaheadBuffer(mLookAheadBufferSize)
|
|
{
|
|
|
|
}
|
|
|
|
void Lz77Encoder::setPrefixCodeGenerator(Ptr<PrefixCodeGenerator> generator)
|
|
{
|
|
mCodeGenerator = std::move(generator);
|
|
}
|
|
|
|
bool Lz77Encoder::hitBufferFull() const
|
|
{
|
|
return mHitBuffer.size() == mMaxHitBufferSize;
|
|
}
|
|
|
|
void Lz77Encoder::populateSearchBuffer(const Hit& hit)
|
|
{
|
|
const auto& [length, distance, next_char] = hit;
|
|
|
|
if (length == 0)
|
|
{
|
|
mSearchBuffer.addItem(next_char);
|
|
}
|
|
else
|
|
{
|
|
Vector<unsigned char> new_items(distance, 0);
|
|
for(unsigned idx=0 ;idx<distance; idx++)
|
|
{
|
|
new_items[idx] = getSearchBufferItem(idx);
|
|
}
|
|
for(auto item : new_items)
|
|
{
|
|
mSearchBuffer.addItem(item);
|
|
}
|
|
|
|
int difference = int(length) - distance;
|
|
if (difference > 0)
|
|
{
|
|
for(unsigned idx=0; idx<unsigned(difference); idx++)
|
|
{
|
|
mSearchBuffer.addItem(mLookaheadBuffer.getItem(idx));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
unsigned char Lz77Encoder::getSearchBufferItem(unsigned index) const
|
|
{
|
|
return mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - index);
|
|
}
|
|
|
|
unsigned Lz77Encoder::lookAheadForMatchingChars(unsigned distance)
|
|
{
|
|
unsigned length{0};
|
|
for(unsigned idx=0; idx<unsigned(mMaxLookAheadBufferIndex + 1); idx++)
|
|
{
|
|
int search_offset = int(distance-1) - idx;
|
|
unsigned char search_char{0};
|
|
if (search_offset < 0)
|
|
{
|
|
search_char = mLookaheadBuffer.getItem(-search_offset - 1);
|
|
}
|
|
else
|
|
{
|
|
search_char = getSearchBufferItem(static_cast<unsigned>(search_offset));
|
|
}
|
|
unsigned char lookahead_char = mLookaheadBuffer.getItem(idx);
|
|
if ((lookahead_char != search_char) || (static_cast<int>(idx) == mMaxLookAheadBufferIndex))
|
|
{
|
|
if (idx + 1>= mMinLengthMatchSize)
|
|
{
|
|
length = idx + 1;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return length;
|
|
}
|
|
|
|
void Lz77Encoder::lookForMatches(unsigned char searchChar, unsigned& hitLength, unsigned& hitOffset)
|
|
{
|
|
for (unsigned idx = 0; idx< mSearchBuffer.getNumItems(); idx++)
|
|
{
|
|
if (mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - idx) == searchChar)
|
|
{
|
|
auto num_hits = lookAheadForMatchingChars(idx + 1);
|
|
if (num_hits > 0 && num_hits >= hitLength)
|
|
{
|
|
hitLength = num_hits;
|
|
hitOffset = idx + 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Lz77Encoder::lookAheadSourceEmpty() const
|
|
{
|
|
if (mLookaheadBuffer.getNumItems() < mLookAheadBufferSize)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (mMaxLookAheadBufferIndex < int(mLookAheadBufferSize) - 1)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void Lz77Encoder::populateLookaheadBuffer(unsigned size, bool firstPass)
|
|
{
|
|
if (!firstPass && lookAheadSourceEmpty())
|
|
{
|
|
for(unsigned idx=0; idx<size; idx++)
|
|
{
|
|
mLookaheadBuffer.addItem(0);
|
|
mMaxLookAheadBufferIndex--;
|
|
}
|
|
return;
|
|
}
|
|
|
|
bool stream_finished{false};
|
|
unsigned stream_end_id{0};
|
|
for(unsigned idx=0; idx<size; idx++)
|
|
{
|
|
if (!stream_finished)
|
|
{
|
|
auto byte = mInputStream->readNextByte();
|
|
if (!byte)
|
|
{
|
|
stream_finished = true;
|
|
stream_end_id = idx -1;
|
|
mLookaheadBuffer.addItem(0);
|
|
mMaxLookAheadBufferIndex--;
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
mLookaheadBuffer.addItem(*byte);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
mLookaheadBuffer.addItem(0);
|
|
mMaxLookAheadBufferIndex--;
|
|
}
|
|
}
|
|
|
|
if (stream_finished && firstPass)
|
|
{
|
|
mMaxLookAheadBufferIndex = stream_end_id;
|
|
}
|
|
}
|
|
|
|
bool Lz77Encoder::encode()
|
|
{
|
|
if (!mCodeGenerator)
|
|
{
|
|
mCodeGenerator = std::make_unique<HuffmanEncoder>();
|
|
}
|
|
|
|
// Fill the lookahead buffer
|
|
mMaxLookAheadBufferIndex = mLookAheadBufferSize - 1;
|
|
populateLookaheadBuffer(mLookAheadBufferSize, true);
|
|
if(mMaxLookAheadBufferIndex < 0)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
bool input_stream_ended{false};
|
|
while(!hitBufferFull())
|
|
{
|
|
if (mMaxLookAheadBufferIndex < 0)
|
|
{
|
|
input_stream_ended = true;
|
|
break;
|
|
}
|
|
|
|
const auto working_byte = mLookaheadBuffer.getItem(0);
|
|
unsigned hit_length{0};
|
|
unsigned hit_distance{0};
|
|
lookForMatches(working_byte, hit_length, hit_distance);
|
|
|
|
const Hit hit{hit_length, hit_distance, working_byte};
|
|
mHitBuffer.push_back(hit);
|
|
|
|
populateSearchBuffer(hit);
|
|
if (hit_length == 0)
|
|
{
|
|
populateLookaheadBuffer(1);
|
|
}
|
|
else
|
|
{
|
|
populateLookaheadBuffer(hit_length);
|
|
}
|
|
}
|
|
|
|
return input_stream_ended;
|
|
}
|
|
|
|
const Vector<Lz77Encoder::Hit>& Lz77Encoder::getHitBuffer() const
|
|
{
|
|
return mHitBuffer;
|
|
}
|
|
|
|
/*
|
|
void Lz77Encoder::flushHitBuffer()
|
|
{
|
|
// If dynamic huffman build trees
|
|
if (!mCodeGenerator)
|
|
{
|
|
mCodeGenerator = std::make_unique<HuffmanEncoder>();
|
|
}
|
|
|
|
// Convert hit buffer to prefix codes and write to output stream
|
|
for (const auto& hit : mHitBuffer)
|
|
{
|
|
const auto& [length, distance, next_char] = hit;
|
|
|
|
PrefixCode code;
|
|
if (length == 0)
|
|
{
|
|
code = *mCodeGenerator->getLiteralValue(next_char);
|
|
std::cout << "Writing symbol " << static_cast<int>(next_char) << " with code " << Bits::toString(code.getData(), code.getLength()) << "\n";
|
|
|
|
mOutputStream->writeNBits(code.getData(), code.getLength());
|
|
}
|
|
else
|
|
{
|
|
code = *mCodeGenerator->getLengthValue(length);
|
|
const auto distance_code = mCodeGenerator->getDistanceValue(distance);
|
|
|
|
std::cout << "Writing length " << length << " with code " << Bits::toString(code.getData(), code.getLength()) << "\n";
|
|
mOutputStream->writeNBits(code.getData(), code.getLength());
|
|
|
|
std::cout << "Writing distance " << distance << " with code " << Bits::toString(distance_code.getData(), distance_code.getLength()) << "\n";
|
|
mOutputStream->writeNBits(distance_code.getData(), distance_code.getLength());
|
|
}
|
|
}
|
|
|
|
auto eos_code = mCodeGenerator->getEndOfStreamValue();
|
|
std::cout << "Writing EOS value with code " << Bits::toString(eos_code->getData(), eos_code->getLength()) << "\n";
|
|
|
|
mOutputStream->writeNBits(eos_code->getData(), eos_code->getLength());
|
|
}
|
|
*/
|
|
|
|
bool Lz77Encoder::decode()
|
|
{
|
|
/*
|
|
String ret;
|
|
|
|
unsigned loc{0};
|
|
while(loc < stream.size())
|
|
{
|
|
auto working_char = stream[loc];
|
|
if (working_char == '@')
|
|
{
|
|
unsigned loc_working = loc;
|
|
|
|
auto remainder = stream.size() - loc;
|
|
String offset;
|
|
|
|
unsigned length_loc{0};
|
|
for(unsigned jdx=0; jdx< remainder; jdx++)
|
|
{
|
|
loc++;
|
|
|
|
auto offset_char = stream[loc];
|
|
if (offset_char == 'L')
|
|
{
|
|
loc++;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
offset += offset_char;
|
|
}
|
|
}
|
|
unsigned offset_amount = std::stoul(offset);
|
|
|
|
String length;
|
|
remainder = stream.size() - loc;
|
|
|
|
for(unsigned jdx=0; jdx< remainder; jdx++)
|
|
{
|
|
auto length_char = stream[loc];
|
|
if (StringUtils::IsAlphabetical(length_char) || length_char == '@')
|
|
{
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
loc++;
|
|
length += length_char;
|
|
}
|
|
}
|
|
|
|
unsigned length_amount = std::stoul(length);
|
|
|
|
auto buffer_index = ret.size() - offset_amount;
|
|
for(unsigned jdx=buffer_index;jdx<buffer_index+length_amount; jdx++)
|
|
{
|
|
ret += ret[jdx];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
loc++;
|
|
ret += working_char;
|
|
}
|
|
}
|
|
return ret;
|
|
*/
|
|
return false;
|
|
}
|
|
|