stuff-from-scratch/src/web/markdown/MarkdownParser.cpp
2022-12-07 10:21:28 +00:00

397 lines
10 KiB
C++

#include "MarkdownParser.h"
#include "MarkdownDocument.h"
#include "MarkdownComponents.h"
#include "Lexer.h"
#include "StringUtils.h"
#include <sstream>
static constexpr char MULTILINE_QUOTE_DELIMITER[]{"```"};
static constexpr char HEADING_DELIMITER{'#'};
MarkdownParser::MarkdownParser()
{
mCustomMultilineDelimiters = {{"$$"}};
mCustomInlineDelimiters = {{"$"}};
}
MarkdownParser::~MarkdownParser()
{
}
bool MarkdownParser::isInMultilineBlock() const
{
if (!mWorkingElement)
{
return false;
}
auto working_type = mWorkingElement->getType();
return working_type == MarkdownElement::Type::MULTILINE_QUOTE || working_type == MarkdownElement::Type::CUSTOM_MULTILINE ;
}
unsigned MarkdownParser::checkForLink(const std::string& lineSection)
{
if (lineSection.empty())
{
return 0;
}
std::vector<std::string> hits;
unsigned hit_size{0};
if (Lexer::matchPattern("[@](@)", lineSection, '@', hits))
{
if (hits.size() == 2)
{
auto tag = hits[0];
auto target = hits[1];
onTextSpanFinished();
auto element = std::make_unique<MarkdownLink>(target);
element->appendTextContent(tag);
addChildToWorkingElement(std::move(element));
hit_size = 4 + tag.size() + target.size();
}
}
return hit_size;
}
unsigned MarkdownParser::checkForImage(const std::string& lineSection)
{
if (lineSection.empty())
{
return 0;
}
std::vector<std::string> hits;
unsigned hit_size{0};
if (Lexer::matchPattern("![@](@)", lineSection, '@', hits))
{
if (hits.size() == 2)
{
auto alt = hits[0];
auto source = hits[1];
onTextSpanFinished();
auto element = std::make_unique<MarkdownImage>(source, alt);
addChildToWorkingElement(std::move(element));
hit_size = 5 + alt.size() + source.size();
}
}
return hit_size;
}
unsigned MarkdownParser::checkForInlineQuote(const std::string& lineSection)
{
if (lineSection.empty())
{
return 0;
}
std::vector<std::string> hits;
unsigned hit_size{0};
if (Lexer::matchPattern("`@`", lineSection, '@', hits))
{
if (hits.size() == 1)
{
auto content = hits[0];
onTextSpanFinished();
auto element = std::make_unique<MarkdownInlineQuote>();
element->appendTextContent(content);
addChildToWorkingElement(std::move(element));
hit_size = 2 + content.size();
}
}
return hit_size;
}
unsigned MarkdownParser::checkForCustomInline(const std::string& lineSection)
{
if (lineSection.empty())
{
return 0;
}
std::vector<std::string> hits;
unsigned hit_size{0};
for(unsigned idx=0; idx<mCustomInlineDelimiters.size(); idx++)
{
const auto delimiter = mCustomInlineDelimiters[idx];
if (Lexer::matchPattern(delimiter + "@" + delimiter, lineSection, '@', hits))
{
if (hits.size() == 1)
{
auto content = hits[0];
onTextSpanFinished();
auto element = std::make_unique<MarkdownCustomInline>(delimiter);
element->appendTextContent(content);
addChildToWorkingElement(std::move(element));
hit_size = 2*delimiter.size() + content.size();
break;
}
}
}
return hit_size;
}
void MarkdownParser::onTextSpanFinished()
{
if (!mWorkingLine.empty())
{
if (mWorkingTextSpan)
{
mWorkingTextSpan->appendTextContent(mWorkingLine);
}
else
{
auto text_span = std::make_unique<MarkdownTextSpan>();
text_span->addLine(mWorkingLine);
mWorkingTextSpan = text_span.get();
addChildToWorkingElement(std::move(text_span));
}
mWorkingLine.clear();
mWorkingTextSpan = nullptr;
}
}
void MarkdownParser::addChildToWorkingElement(std::unique_ptr<MarkdownInlineElement> child)
{
dynamic_cast<MarkdownElementWithChildren*>(mWorkingElement)->addChild(std::move(child));
}
void MarkdownParser::processLine(const std::string& line)
{
if (isInMultilineBlock())
{
mWorkingElement->addLine(line);
return;
}
if (!mWorkingElement)
{
auto paragraph = std::make_unique<MarkdownParagraph>();
mWorkingElement = paragraph.get();
mMarkdownDocument->addElement(std::move(paragraph));
}
if (mWorkingElement && mWorkingElement->getType() == MarkdownElement::Type::PARAGRAPH)
{
if (auto last_text_span = dynamic_cast<MarkdownParagraph*>(mWorkingElement)->getLastChild())
{
mWorkingTextSpan = last_text_span;
}
}
unsigned line_position = 0;
mWorkingLine.clear();
while(line_position < line.size())
{
const auto remaining = line.substr(line_position, line.size() - line_position);
if(auto length = checkForImage(remaining))
{
line_position += length;
}
else if(auto length = checkForLink(remaining))
{
line_position += length;
}
else if(auto length = checkForInlineQuote(remaining))
{
line_position += length;
}
else if(auto length = checkForCustomInline(remaining))
{
line_position += length;
}
else
{
mWorkingLine += line[line_position];
line_position++;
}
}
onTextSpanFinished();
}
void MarkdownParser::onEmptyLine()
{
if (!isInMultilineBlock())
{
onSectionFinished();
}
}
bool MarkdownParser::startsWithMultiLineQuote(const std::string& line) const
{
const bool ignore_whitespace{true};
return StringUtils::startsWith(line, MULTILINE_QUOTE_DELIMITER, ignore_whitespace);
}
int MarkdownParser::startsWithCustomMultilineBlock(const std::string& line) const
{
for(unsigned idx=0; idx<mCustomMultilineDelimiters.size(); idx++)
{
if (StringUtils::startsWith(line, mCustomMultilineDelimiters[idx], true))
{
return idx;
}
}
return -1;
}
bool MarkdownParser::startsWithHeading(const std::string& line) const
{
return StringUtils::startsWith(line, "#", true);
}
bool MarkdownParser::startsWithBulletItem(const std::string& line) const
{
return StringUtils::startsWith(line, "*", true);
}
void MarkdownParser::onFoundMultiLineQuote(const std::string& line)
{
if (mWorkingElement && mWorkingElement->getType() == MarkdownElement::Type::MULTILINE_QUOTE)
{
onSectionFinished();
}
else if(isInMultilineBlock())
{
processLine(line);
}
else
{
const auto tag = StringUtils::removeUpTo(line, MULTILINE_QUOTE_DELIMITER);
auto quote = std::make_unique<MarkdownMultilineQuote>(tag);
mWorkingElement = quote.get();
mMarkdownDocument->addElement(std::move(quote));
}
}
void MarkdownParser::onFoundCustomMultiLineBlock(const std::string& line, unsigned blockSlot)
{
if (mWorkingElement && mWorkingElement->getType() == MarkdownElement::Type::CUSTOM_MULTILINE && blockSlot == mCustomDelimiterIndex)
{
onSectionFinished();
}
else if(isInMultilineBlock())
{
processLine(line);
}
else
{
const auto delimiter = mCustomMultilineDelimiters[blockSlot];
const auto tag = StringUtils::removeUpTo(line, delimiter);
auto quote = std::make_unique<MarkdownCustomMultiLine>(tag, delimiter);
mWorkingElement = quote.get();
mMarkdownDocument->addElement(std::move(quote));
}
}
void MarkdownParser::onFoundHeading(const std::string& line)
{
if(isInMultilineBlock())
{
processLine(line);
}
else
{
onSectionFinished();
unsigned level = StringUtils::countFirstConsecutiveHits(line, HEADING_DELIMITER);
auto heading = std::make_unique<MarkdownHeading>(level);
std::string prefix;
for(unsigned idx=0; idx<level; idx++)
{
prefix += HEADING_DELIMITER;
}
heading->appendTextContent(StringUtils::stripSurroundingWhitepsace(StringUtils::removeUpTo(line, prefix)));
mMarkdownDocument->addElement(std::move(heading));
}
}
void MarkdownParser::onFoundBulletItem(const std::string& line)
{
if(isInMultilineBlock())
{
processLine(line);
}
else
{
if (mWorkingBulletList)
{
auto item = std::make_unique<MarkdownBulletItem>();
mWorkingElement = item.get();
mWorkingBulletList->addChild(std::move(item));
}
else
{
auto bullet_list = std::make_unique<MarkdownBulletList>();
mWorkingBulletList = bullet_list.get();
mMarkdownDocument->addElement(std::move(bullet_list));
auto bullet_item = std::make_unique<MarkdownBulletItem>();
mWorkingElement = bullet_item.get();
mWorkingBulletList->addChild(std::move(bullet_item));
}
processLine(StringUtils::removeUpTo(line, "*"));
}
}
void MarkdownParser::onSectionFinished()
{
mWorkingElement = nullptr;
mWorkingBulletList = nullptr;
mWorkingTextSpan = nullptr;
}
std::unique_ptr<MarkdownDocument> MarkdownParser::run(const std::string& content)
{
mMarkdownDocument = std::make_unique<MarkdownDocument>();
std::stringstream ss(content);
std::string line;
while (std::getline(ss, line, '\n'))
{
if (StringUtils::isWhitespaceOnly(line))
{
onEmptyLine();
continue;
}
else if (startsWithMultiLineQuote(line))
{
onFoundMultiLineQuote(line);
}
else if (auto result = startsWithCustomMultilineBlock(line); result >= 0)
{
onFoundCustomMultiLineBlock(line, result);
}
else if (startsWithHeading(line))
{
onFoundHeading(line);
}
else if(startsWithBulletItem(line))
{
onFoundBulletItem(line);
}
else
{
processLine(line);
}
}
return std::move(mMarkdownDocument);
}