Convert lz77 to use fixed buffer sizes.
This commit is contained in:
parent
a6e31c8d39
commit
af6fad72eb
9 changed files with 362 additions and 110 deletions
|
@ -8,55 +8,102 @@
|
|||
#include <iostream>
|
||||
|
||||
Lz77Encoder::Lz77Encoder(BitStream* inputStream, BitStream* outputStream)
|
||||
: AbstractEncoder(inputStream, outputStream)
|
||||
: AbstractEncoder(inputStream, outputStream),
|
||||
mSearchBuffer(mSearchBufferSize),
|
||||
mLookaheadBuffer(mLookAheadBufferSize)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned Lz77Encoder::lookAheadForMatchingChars(std::vector<char>& matchBuffer, unsigned searchIndex, unsigned hitOffset, const std::string& stream, unsigned streamLoc)
|
||||
void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator)
|
||||
{
|
||||
auto remaining_size = stream.size() - streamLoc;
|
||||
mCodeGenerator = std::move(generator);
|
||||
}
|
||||
|
||||
unsigned num_hits{1};
|
||||
for (unsigned jdx=1; jdx< remaining_size; jdx++)
|
||||
bool Lz77Encoder::hitBufferFull() const
|
||||
{
|
||||
return mHitBuffer.size() == mMaxHitBufferSize;
|
||||
}
|
||||
|
||||
void Lz77Encoder::populateSearchBuffer(const Hit& hit)
|
||||
{
|
||||
const auto& [length, distance, next_char] = hit;
|
||||
|
||||
if (length == 0)
|
||||
{
|
||||
char buffer_char{0};
|
||||
if (searchIndex + jdx < mSearchBuffer.size())
|
||||
mSearchBuffer.addItem(next_char);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<unsigned char> new_items(distance, 0);
|
||||
for(unsigned idx=0 ;idx<distance; idx++)
|
||||
{
|
||||
buffer_char = mSearchBuffer[searchIndex + jdx];
|
||||
new_items[idx] = getSearchBufferItem(idx);
|
||||
}
|
||||
else
|
||||
for(auto item : new_items)
|
||||
{
|
||||
buffer_char = stream[jdx - hitOffset];
|
||||
mSearchBuffer.addItem(item);
|
||||
}
|
||||
|
||||
auto lookahead_char = stream[streamLoc + jdx];
|
||||
if (lookahead_char == buffer_char)
|
||||
int difference = int(length) - distance;
|
||||
if (difference > 0)
|
||||
{
|
||||
matchBuffer.push_back(buffer_char);
|
||||
num_hits++;
|
||||
for(unsigned idx=0; idx<difference; idx++)
|
||||
{
|
||||
mSearchBuffer.addItem(mLookaheadBuffer.getItem(idx));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned char Lz77Encoder::getSearchBufferItem(unsigned index) const
|
||||
{
|
||||
return mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - index);
|
||||
}
|
||||
|
||||
unsigned Lz77Encoder::lookAheadForMatchingChars(unsigned distance)
|
||||
{
|
||||
unsigned length{0};
|
||||
std::cout << "In hit check at distance " << distance << " max buffer index is: " << mMaxLookAheadBufferIndex << std::endl;
|
||||
for(unsigned idx=0; idx<mMaxLookAheadBufferIndex + 1; idx++)
|
||||
{
|
||||
int search_offset = int(distance-1) - idx;
|
||||
std::cout << "Have search offet " << search_offset << std::endl;
|
||||
unsigned char search_char{0};
|
||||
if (search_offset < 0)
|
||||
{
|
||||
search_char = mLookaheadBuffer.getItem(-search_offset - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
search_char = getSearchBufferItem(static_cast<unsigned>(search_offset));
|
||||
}
|
||||
|
||||
unsigned char lookahead_char = mLookaheadBuffer.getItem(idx);
|
||||
|
||||
std::cout << "Checking search char " << static_cast<int>(search_char) << " and lookup char " << static_cast<int>(lookahead_char) << std::endl;
|
||||
if ((lookahead_char != search_char) || (idx == mMaxLookAheadBufferIndex))
|
||||
{
|
||||
if (idx + 1>= mMinLengthMatchSize)
|
||||
{
|
||||
length = idx + 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return num_hits;
|
||||
return length;
|
||||
}
|
||||
|
||||
void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength, unsigned& hitOffset, const std::string& stream, unsigned streamLoc)
|
||||
void Lz77Encoder::lookForMatches(unsigned char searchChar, unsigned& hitLength, unsigned& hitOffset)
|
||||
{
|
||||
for(unsigned idx=0; idx<mSearchBuffer.size(); idx++)
|
||||
for (unsigned idx = 0; idx< mSearchBuffer.getNumItems(); idx++)
|
||||
{
|
||||
auto search_index = mSearchBuffer.size() - idx - 1;
|
||||
|
||||
if (auto buffer_char = mSearchBuffer[search_index]; buffer_char == searchChar)
|
||||
if (mSearchBuffer.getItem(mSearchBuffer.getNumItems() - 1 - idx) == searchChar)
|
||||
{
|
||||
std::vector<char> match_buffer{buffer_char};
|
||||
auto num_hits = lookAheadForMatchingChars(match_buffer, search_index, idx, stream, streamLoc);
|
||||
std::cout << "Looking for hits " << std::endl;
|
||||
auto num_hits = lookAheadForMatchingChars(idx + 1);
|
||||
|
||||
if (num_hits >= hitLength)
|
||||
if (num_hits > 0 && num_hits >= hitLength)
|
||||
{
|
||||
hitLength = num_hits;
|
||||
hitOffset = idx + 1;
|
||||
|
@ -65,75 +112,163 @@ void Lz77Encoder::lookThroughSearchBuffer(char searchChar, unsigned& hitLength,
|
|||
}
|
||||
}
|
||||
|
||||
void Lz77Encoder::setPrefixCodeGenerator(std::unique_ptr<PrefixCodeGenerator> generator)
|
||||
bool Lz77Encoder::lookAheadSourceEmpty() const
|
||||
{
|
||||
mCodeGenerator = std::move(generator);
|
||||
if (mLookaheadBuffer.getNumItems() < mLookAheadBufferSize)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (mMaxLookAheadBufferIndex < mLookAheadBufferSize - 1)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Lz77Encoder::populateLookaheadBuffer(unsigned size, bool firstPass)
|
||||
{
|
||||
if (!firstPass && lookAheadSourceEmpty())
|
||||
{
|
||||
for(unsigned idx=0; idx<size; idx++)
|
||||
{
|
||||
mLookaheadBuffer.addItem(0);
|
||||
mMaxLookAheadBufferIndex--;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
bool stream_finished{false};
|
||||
unsigned stream_end_id{0};
|
||||
for(unsigned idx=0; idx<size; idx++)
|
||||
{
|
||||
if (!stream_finished)
|
||||
{
|
||||
auto byte = mInputStream->readNextByte();
|
||||
if (!byte)
|
||||
{
|
||||
stream_finished = true;
|
||||
stream_end_id = idx -1;
|
||||
mLookaheadBuffer.addItem(0);
|
||||
mMaxLookAheadBufferIndex--;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
mLookaheadBuffer.addItem(*byte);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
mLookaheadBuffer.addItem(0);
|
||||
mMaxLookAheadBufferIndex--;
|
||||
}
|
||||
}
|
||||
|
||||
if (stream_finished && firstPass)
|
||||
{
|
||||
mMaxLookAheadBufferIndex = stream_end_id;
|
||||
}
|
||||
}
|
||||
|
||||
bool Lz77Encoder::encode()
|
||||
{
|
||||
if (!mCodeGenerator)
|
||||
{
|
||||
auto code_generator = std::make_unique<HuffmanEncoder>();
|
||||
auto huffman_encoder = code_generator.get();
|
||||
|
||||
mCodeGenerator = std::move(code_generator);
|
||||
huffman_encoder->setUseFixedCode(true);
|
||||
huffman_encoder->initializeLiteralLengthTable();
|
||||
mCodeGenerator = std::make_unique<HuffmanEncoder>();
|
||||
}
|
||||
|
||||
while(auto byte = mInputStream->readNextByte())
|
||||
// Fill the lookahead buffer
|
||||
mMaxLookAheadBufferIndex = mLookAheadBufferSize - 1;
|
||||
populateLookaheadBuffer(mLookAheadBufferSize, true);
|
||||
if(mMaxLookAheadBufferIndex < 0)
|
||||
{
|
||||
const auto code = mCodeGenerator->getLiteralValue(*byte);
|
||||
|
||||
std::cout << "Writing value " << static_cast<int>(*byte) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
|
||||
|
||||
mOutputStream->writeNBits(code.getData(), code.getLength());
|
||||
return true;
|
||||
}
|
||||
|
||||
auto eos_code = mCodeGenerator->getEndOfStreamValue();
|
||||
std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code.getData(), eos_code.getLength()) << "\n";
|
||||
|
||||
mOutputStream->writeNBits(eos_code.getData(), eos_code.getLength());
|
||||
|
||||
|
||||
|
||||
/*
|
||||
unsigned loc{0};
|
||||
std::string ret;
|
||||
|
||||
while(!mInputStream->isFinished())
|
||||
bool input_stream_ended{false};
|
||||
while(!hitBufferFull())
|
||||
{
|
||||
auto search_char = stream[loc];
|
||||
if (mMaxLookAheadBufferIndex < 0)
|
||||
{
|
||||
input_stream_ended = true;
|
||||
break;
|
||||
}
|
||||
|
||||
const auto working_byte = mLookaheadBuffer.getItem(0);
|
||||
std::cout << "Working byte is " << static_cast<int>(working_byte) << std::endl;
|
||||
|
||||
unsigned hit_length{0};
|
||||
unsigned hit_offset{0};
|
||||
lookThroughSearchBuffer(search_char, hit_length, hit_offset, stream, loc);
|
||||
unsigned hit_distance{0};
|
||||
lookForMatches(working_byte, hit_length, hit_distance);
|
||||
|
||||
if (hit_length > 0)
|
||||
std::cout << "Got hit length " << hit_length << " and distance " << hit_distance << std::endl;
|
||||
|
||||
const Hit hit{hit_length, hit_distance, working_byte};
|
||||
mHitBuffer.push_back(hit);
|
||||
|
||||
populateSearchBuffer(hit);
|
||||
if (hit_length == 0)
|
||||
{
|
||||
ret += "@" + std::to_string(hit_offset) + "L" + std::to_string(hit_length);
|
||||
loc+=hit_length;
|
||||
|
||||
auto hit_loc = mSearchBuffer.size() - hit_offset;
|
||||
for(unsigned idx=hit_loc; idx<hit_loc + hit_length; idx++)
|
||||
{
|
||||
mSearchBuffer.push_back(mSearchBuffer[idx]);
|
||||
}
|
||||
populateLookaheadBuffer(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret += search_char;
|
||||
mSearchBuffer.push_back(search_char);
|
||||
loc++;
|
||||
populateLookaheadBuffer(hit_length);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
*/
|
||||
return false;
|
||||
return input_stream_ended;
|
||||
}
|
||||
|
||||
const std::vector<Lz77Encoder::Hit>& Lz77Encoder::getHitBuffer() const
|
||||
{
|
||||
return mHitBuffer;
|
||||
}
|
||||
|
||||
/*
|
||||
void Lz77Encoder::flushHitBuffer()
|
||||
{
|
||||
// If dynamic huffman build trees
|
||||
if (!mCodeGenerator)
|
||||
{
|
||||
mCodeGenerator = std::make_unique<HuffmanEncoder>();
|
||||
}
|
||||
|
||||
// Convert hit buffer to prefix codes and write to output stream
|
||||
for (const auto& hit : mHitBuffer)
|
||||
{
|
||||
const auto& [length, distance, next_char] = hit;
|
||||
|
||||
PrefixCode code;
|
||||
if (length == 0)
|
||||
{
|
||||
code = *mCodeGenerator->getLiteralValue(next_char);
|
||||
std::cout << "Writing symbol " << static_cast<int>(next_char) << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
|
||||
|
||||
mOutputStream->writeNBits(code.getData(), code.getLength());
|
||||
}
|
||||
else
|
||||
{
|
||||
code = *mCodeGenerator->getLengthValue(length);
|
||||
const auto distance_code = mCodeGenerator->getDistanceValue(distance);
|
||||
|
||||
std::cout << "Writing length " << length << " with code " << ByteUtils::toString(code.getData(), code.getLength()) << "\n";
|
||||
mOutputStream->writeNBits(code.getData(), code.getLength());
|
||||
|
||||
std::cout << "Writing distance " << distance << " with code " << ByteUtils::toString(distance_code.getData(), distance_code.getLength()) << "\n";
|
||||
mOutputStream->writeNBits(distance_code.getData(), distance_code.getLength());
|
||||
}
|
||||
}
|
||||
|
||||
auto eos_code = mCodeGenerator->getEndOfStreamValue();
|
||||
std::cout << "Writing EOS value with code " << ByteUtils::toString(eos_code->getData(), eos_code->getLength()) << "\n";
|
||||
|
||||
mOutputStream->writeNBits(eos_code->getData(), eos_code->getLength());
|
||||
}
|
||||
*/
|
||||
|
||||
bool Lz77Encoder::decode()
|
||||
{
|
||||
/*
|
||||
|
@ -203,3 +338,4 @@ bool Lz77Encoder::decode()
|
|||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue