Improvements for markdown parsing.

This commit is contained in:
James Grogan 2022-12-06 18:02:43 +00:00
parent fc44290e3f
commit 8705859115
40 changed files with 957 additions and 537 deletions

View file

@ -0,0 +1,72 @@
#include "Lexer.h"
bool Lexer::matchPattern(const std::string& pattern, const std::string& checkString, char delimiter, std::vector<std::string>& hitSequence)
{
if (checkString.empty())
{
return false;
}
if (pattern.empty())
{
return false;
}
bool found_pattern = true;
unsigned check_idx = 0;
unsigned pattern_idx = 0;
std::vector<std::string> hits;
std::string working_hit;
while(check_idx < checkString.size())
{
if (pattern_idx == pattern.size())
{
break;
}
auto check_char = checkString[check_idx];
auto pattern_char = pattern[pattern_idx];
if (pattern_char == delimiter)
{
if (pattern_idx + 1 < pattern.size())
{
if (check_char == pattern[pattern_idx + 1])
{
hits.push_back(working_hit);
working_hit.clear();
pattern_idx++;
}
else
{
working_hit+=check_char;
check_idx++;
}
}
else
{
working_hit+=check_char;
check_idx++;
}
}
else
{
if (check_char == pattern_char)
{
check_idx++;
pattern_idx++;
}
else
{
found_pattern = false;
break;
}
}
}
if (found_pattern)
{
hitSequence = hits;
}
return found_pattern;
}

View file

@ -0,0 +1,11 @@
#pragma once
#include <string>
#include <vector>
class Lexer
{
public:
// e.g. Pattern [@](@) returns <source, tag> for input: [source](tag) and delimiter @
static bool matchPattern(const std::string& pattern, const std::string& checkString, char delimiter, std::vector<std::string>& hitSequence);
};

View file

@ -195,7 +195,7 @@ void TemplateFile::onFoundExtends(const std::vector<std::string> args)
void TemplateFile::onFoundExpression(const std::string& expression_string) void TemplateFile::onFoundExpression(const std::string& expression_string)
{ {
auto stripped = StringUtils::strip(expression_string); const auto stripped = StringUtils::stripSurroundingWhitepsace(expression_string);
auto expression = std::make_unique<TemplateExpression>(mWorkingNode, stripped); auto expression = std::make_unique<TemplateExpression>(mWorkingNode, stripped);
mWorkingNode->addChild(std::move(expression)); mWorkingNode->addChild(std::move(expression));
} }

View file

@ -48,3 +48,13 @@ std::string CommandLineArgs::getArg(std::size_t index) const
} }
return ""; return "";
} }
std::vector<std::string> CommandLineArgs::getUserArgs() const
{
std::vector<std::string> user_args;
for(unsigned idx=1; idx<mArugments.size(); idx++)
{
user_args.push_back(mArugments[idx]);
}
return user_args;
}

View file

@ -24,6 +24,8 @@ public:
void recordLaunchPath(); void recordLaunchPath();
std::vector<std::string> getUserArgs() const;
private: private:
std::vector<std::string> mArugments; std::vector<std::string> mArugments;
std::filesystem::path mLaunchPath; std::filesystem::path mLaunchPath;

View file

@ -1,4 +1,5 @@
#include "StringUtils.h" #include "StringUtils.h"
#include <locale> #include <locale>
#include <algorithm> #include <algorithm>
#include <sstream> #include <sstream>
@ -8,19 +9,17 @@
#include "Windows.h" #include "Windows.h"
#endif #endif
bool StringUtils::IsAlphaNumeric(char c) bool StringUtils::isAlphaNumeric(char c)
{ {
std::locale loc; return std::isalnum(c);
return std::isalnum(c, loc);
} }
bool StringUtils::IsSpace(char c) bool StringUtils::isSpace(char c)
{ {
std::locale loc; return std::isspace(c);
return std::isspace(c, loc);
} }
bool StringUtils::IsAlphabetical(char c) bool StringUtils::isAlphabetical(char c)
{ {
return std::isalpha(c); return std::isalpha(c);
} }
@ -46,7 +45,44 @@ std::vector<std::string> StringUtils::toLines(const std::string& input)
return result; return result;
} }
std::string StringUtils::strip(const std::string& input) bool StringUtils::isWhitespaceOnly(const std::string& input)
{
if (input.empty())
{
return true;
}
else
{
return std::all_of(input.cbegin(), input.cend(), [](char c){ return std::isspace(c); });
}
}
unsigned StringUtils::countFirstConsecutiveHits(const std::string& input, char c)
{
auto found_id = input.find(c);
if(found_id == std::string::npos)
{
return 0;
}
else
{
unsigned count = 1;
for(unsigned idx=found_id+1; idx<input.size(); idx++)
{
if(input[idx] == c)
{
count++;
}
else
{
return count;
}
}
return count;
}
}
std::string StringUtils::stripSurroundingWhitepsace(const std::string& input)
{ {
if (input.empty()) if (input.empty())
{ {
@ -110,12 +146,10 @@ std::vector<std::string> StringUtils::split(const std::string& input)
return substrings; return substrings;
} }
std::string StringUtils::toLower(const std::string& s)
std::string StringUtils::ToLower(const std::string& s)
{ {
std::string ret; std::string ret;
std::transform(s.begin(), s.end(), ret.begin(), std::transform(s.begin(), s.end(), ret.begin(), [](unsigned char c){ return std::tolower(c); });
[](unsigned char c){ return std::tolower(c); });
return ret; return ret;
} }
@ -139,7 +173,7 @@ std::string StringUtils::convert(const std::wstring& input)
#endif #endif
} }
std::string StringUtils::ToPaddedString(unsigned numBytes, unsigned entry) std::string StringUtils::toPaddedString(unsigned numBytes, unsigned entry)
{ {
std::stringstream sstr; std::stringstream sstr;
sstr << std::setfill('0') << std::setw(numBytes) << entry; sstr << std::setfill('0') << std::setw(numBytes) << entry;
@ -165,20 +199,35 @@ std::string StringUtils::stripQuotes(const std::string& input)
return input.substr(start_index, end_index - start_index + 1); return input.substr(start_index, end_index - start_index + 1);
} }
std::string StringUtils::replaceWith(const std::string& inputString, const std::string& searchString, const std::string& replaceString)
{
return inputString;
}
std::string StringUtils::removeUpTo(const std::string& input, const std::string& prefix) std::string StringUtils::removeUpTo(const std::string& input, const std::string& prefix)
{ {
std::size_t found = input.find(prefix); std::size_t found = input.find(prefix);
if (found!=std::string::npos) if (found != std::string::npos)
{ {
return input.substr(found, prefix.size()); return input.substr(found + prefix.size(), input.size()-found);
} }
else else
{ {
return input; return input;
} }
} }
bool StringUtils::startsWith(const std::string& input, const std::string& prefix, bool ignoreWhitespace)
{
if(ignoreWhitespace)
{
const auto loc = input.find(prefix);
if (loc == std::string::npos)
{
return false;
}
else
{
return isWhitespaceOnly(input.substr(0, loc));
}
}
else
{
return input.find(prefix) == 0;
}
}

View file

@ -16,26 +16,35 @@ public:
static constexpr char SINGLE_QUOTE = '\''; static constexpr char SINGLE_QUOTE = '\'';
static constexpr char COLON = ':'; static constexpr char COLON = ':';
static bool IsAlphaNumeric(char c); static unsigned countFirstConsecutiveHits(const std::string& input, char c);
static bool IsAlphabetical(char c);
static bool IsSpace(char c);
static std::string ToLower(const std::string& s);
static std::string convert(const std::wstring& input); static std::string convert(const std::wstring& input);
static std::string ToPaddedString(unsigned numBytes, unsigned entry);
static std::vector<std::string> split(const std::string& input); static bool isAlphaNumeric(char c);
static std::string strip(const std::string& input);
static bool isAlphabetical(char c);
static bool isSpace(char c);
static bool isWhitespaceOnly(const std::string& input);
static std::string removeUpTo(const std::string& input, const std::string& prefix); static std::string removeUpTo(const std::string& input, const std::string& prefix);
static std::vector<std::string> toLines(const std::string& input); static std::vector<std::string> split(const std::string& input);
static bool startsWith(const std::string& input, const std::string& prefix, bool ignoreWhitespace = false);
static std::string stripSurroundingWhitepsace(const std::string& input);
static std::string stripQuotes(const std::string& input); static std::string stripQuotes(const std::string& input);
static std::vector<unsigned char> toBytes(const std::string& input); static std::vector<unsigned char> toBytes(const std::string& input);
static std::string toLower(const std::string& s);
static std::vector<std::string> toLines(const std::string& input);
static std::string toPaddedString(unsigned numBytes, unsigned entry);
static std::string toString(const std::vector<unsigned char>& bytes); static std::string toString(const std::vector<unsigned char>& bytes);
static std::string replaceWith(const std::string& inputString, const std::string& searchString, const std::string& replaceString);
}; };

View file

@ -13,7 +13,7 @@ FileFormat::ExtensionMap FileFormat::mExtensions = []
bool FileFormat::isFormat(const std::string& extension, Format format) bool FileFormat::isFormat(const std::string& extension, Format format)
{ {
return StringUtils::ToLower(extension) == mExtensions[format]; return StringUtils::toLower(extension) == mExtensions[format];
} }
FileFormat::Format FileFormat::inferFormat(const std::string& query) FileFormat::Format FileFormat::inferFormat(const std::string& query)

View file

@ -25,7 +25,6 @@ public:
}; };
using ExtensionMap = std::map<Format, std::string>; using ExtensionMap = std::map<Format, std::string>;
public: public:
static bool isFormat(const std::string& extension, Format format); static bool isFormat(const std::string& extension, Format format);

View file

@ -38,7 +38,7 @@ void HttpRequest::parseFirstLine(const std::string& line)
const auto c = line[idx]; const auto c = line[idx];
if (inPath) if (inPath)
{ {
if (StringUtils::IsSpace(c)) if (StringUtils::isSpace(c))
{ {
inPath = false; inPath = false;
inMethod = true; inMethod = true;
@ -50,7 +50,7 @@ void HttpRequest::parseFirstLine(const std::string& line)
} }
else if (inMethod) else if (inMethod)
{ {
if (StringUtils::IsSpace(c)) if (StringUtils::isSpace(c))
{ {
inMethod = false; inMethod = false;
inProtocol = true; inProtocol = true;

View file

@ -12,7 +12,6 @@ public:
void parseMessage(const std::string& message); void parseMessage(const std::string& message);
private: private:
void parseFirstLine(const std::string& line); void parseFirstLine(const std::string& line);
HttpHeader mHeader; HttpHeader mHeader;

View file

@ -16,8 +16,8 @@ std::string PdfXRefTable::toString()
content += "\n"; content += "\n";
for (const auto& record : section.mRecords) for (const auto& record : section.mRecords)
{ {
auto offsetString = StringUtils::ToPaddedString(10, record.mOffsetBytes); auto offsetString = StringUtils::toPaddedString(10, record.mOffsetBytes);
auto generationString = StringUtils::ToPaddedString(5, record.mGenerationNumber); auto generationString = StringUtils::toPaddedString(5, record.mGenerationNumber);
auto freeString = record.mIsFree ? "f" : "n"; auto freeString = record.mIsFree ? "f" : "n";
content += offsetString + " " + generationString + " " + freeString + "\n"; content += offsetString + " " + generationString + " " + freeString + "\n";

View file

@ -22,14 +22,13 @@ class PdfXRefTable
public: public:
PdfXRefTable(); PdfXRefTable();
std::string toString(); void addRecord(unsigned numBytes, unsigned generation, unsigned isFree);
unsigned getNextOffset(); unsigned getNextOffset();
void addRecord(unsigned numBytes, unsigned generation, unsigned isFree);
unsigned getNumEntries(); unsigned getNumEntries();
std::string toString();
private: private:
unsigned mLastAddedBytes{0}; unsigned mLastAddedBytes{0};
std::vector<TableSubSection> mSections; std::vector<TableSubSection> mSections;

View file

@ -1,3 +1,5 @@
set(MODULE_NAME visual_elements)
list(APPEND visual_elements_LIB_INCLUDES list(APPEND visual_elements_LIB_INCLUDES
GeometryNode.cpp GeometryNode.cpp
basic_shapes/RectangleNode.cpp basic_shapes/RectangleNode.cpp
@ -16,15 +18,14 @@ list(APPEND visual_elements_LIB_INCLUDES
) )
add_library(visual_elements SHARED ${visual_elements_LIB_INCLUDES}) add_library(${MODULE_NAME} SHARED ${visual_elements_LIB_INCLUDES})
target_include_directories(visual_elements PUBLIC target_include_directories(${MODULE_NAME} PUBLIC
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/basic_shapes ${CMAKE_CURRENT_SOURCE_DIR}/basic_shapes
) )
target_link_libraries(visual_elements PUBLIC core geometry fonts mesh image) target_link_libraries(${MODULE_NAME} PUBLIC core geometry fonts mesh image)
set_property(TARGET visual_elements PROPERTY FOLDER src) set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER src)
set_target_properties( ${MODULE_NAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON )
set_target_properties( visual_elements PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON )

View file

@ -1,3 +1,5 @@
set(MODULE_NAME web)
list(APPEND web_LIB_INCLUDES list(APPEND web_LIB_INCLUDES
xml/XmlParser.h xml/XmlParser.h
xml/XmlParser.cpp xml/XmlParser.cpp
@ -24,19 +26,20 @@ list(APPEND web_LIB_INCLUDES
html/HtmlElement.cpp html/HtmlElement.cpp
html/elements/HtmlHeadElement.cpp html/elements/HtmlHeadElement.cpp
html/elements/HtmlBodyElement.cpp html/elements/HtmlBodyElement.cpp
html/elements/HtmlParagraphElement.cpp
) )
# add the executable # add the executable
add_library(web SHARED ${web_LIB_INCLUDES}) add_library(${MODULE_NAME} SHARED ${web_LIB_INCLUDES})
target_include_directories(web PUBLIC target_include_directories(web PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}" ${CMAKE_CURRENT_SOURCE_DIR}
"${CMAKE_CURRENT_SOURCE_DIR}/xml" ${CMAKE_CURRENT_SOURCE_DIR}/xml
"${CMAKE_CURRENT_SOURCE_DIR}/xml/xml-elements" ${CMAKE_CURRENT_SOURCE_DIR}/xml/xml-elements
"${CMAKE_CURRENT_SOURCE_DIR}/html" ${CMAKE_CURRENT_SOURCE_DIR}/html
"${CMAKE_CURRENT_SOURCE_DIR}/html/elements" ${CMAKE_CURRENT_SOURCE_DIR}/html/elements
"${CMAKE_CURRENT_SOURCE_DIR}/markdown" ${CMAKE_CURRENT_SOURCE_DIR}/markdown
) )
set_property(TARGET web PROPERTY FOLDER src) set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER src)
target_link_libraries(web PUBLIC core) target_link_libraries(${MODULE_NAME} PUBLIC core compiler)
set_target_properties( web PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON ) set_target_properties( ${MODULE_NAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON )

View file

@ -15,7 +15,7 @@ public:
return Type::TEXT_RUN; return Type::TEXT_RUN;
} }
std::string toString(unsigned depth = 0) const override std::string toString(unsigned depth = 0, bool keepInline = false) const override
{ {
const auto prefix = std::string(2*depth, ' '); const auto prefix = std::string(2*depth, ' ');
return prefix + getText(); return prefix + getText();

View file

@ -0,0 +1,46 @@
#include "HtmlParagraphElement.h"
std::string HtmlParagraphElement::toString(unsigned depth, bool keepInline) const
{
const auto prefix = std::string(2*depth, ' ');
auto content = prefix + "<" + getTagName();
for (std::size_t idx=0; idx< getNumAttributes(); idx++)
{
auto attribute = getAttribute(idx);
content += " " + attribute->getName() + "=\"" + attribute->getValue() + "\"";
}
const auto num_children = getNumChildren();
if (num_children == 0 && getText().empty())
{
content += "/>\n";
return content;
}
else
{
content += ">";
}
if (!getText().empty())
{
content += getText();
}
if (num_children>0)
{
content += "\n";
}
for (std::size_t idx=0; idx< getNumChildren(); idx++)
{
auto child = getChild(idx);
content += child->toString(depth+1, true);
}
if (num_children>0)
{
content += prefix;
}
content += "</" + getTagName() + ">\n";
return content;
}

View file

@ -5,7 +5,6 @@
class HtmlParagraphElement : public HtmlElement class HtmlParagraphElement : public HtmlElement
{ {
public: public:
HtmlParagraphElement() : HtmlElement("p") HtmlParagraphElement() : HtmlElement("p")
{ {
@ -15,4 +14,6 @@ public:
{ {
return Type::PARAGRAPH; return Type::PARAGRAPH;
} }
std::string toString(unsigned depth = 0, bool keepInline = false) const override;
}; };

View file

@ -12,21 +12,46 @@ MarkdownParagraph::Type MarkdownParagraph::getType() const
return Type::PARAGRAPH; return Type::PARAGRAPH;
} }
void MarkdownParagraph::addChild(std::unique_ptr<MarkdownInlineElement> child) void MarkdownElementWithChildren::addChild(std::unique_ptr<MarkdownInlineElement> child)
{ {
mChildren.push_back(std::move(child)); mChildren.push_back(std::move(child));
} }
std::size_t MarkdownParagraph::getNumChildren() const std::size_t MarkdownElementWithChildren::getNumChildren() const
{ {
return mChildren.size(); return mChildren.size();
} }
MarkdownInlineElement* MarkdownParagraph::getChild(std::size_t idx) const MarkdownInlineElement* MarkdownElementWithChildren::getChild(std::size_t idx) const
{ {
return mChildren[idx].get(); return mChildren[idx].get();
} }
MarkdownInlineElement* MarkdownElementWithChildren::getLastChild() const
{
if (mChildren.empty())
{
return nullptr;
}
else
{
return mChildren[mChildren.size()-1].get();
}
}
std::vector<MarkdownLink*> MarkdownElementWithChildren::getAllLinks() const
{
std::vector<MarkdownLink*> links;
for(auto& child : mChildren)
{
if (child->getType() == Type::LINK)
{
links.push_back(dynamic_cast<MarkdownLink*>(child.get()));
}
}
return links;
}
MarkdownBulletItem::Type MarkdownBulletItem::getType() const MarkdownBulletItem::Type MarkdownBulletItem::getType() const
{ {
return Type::BULLET_ITEM; return Type::BULLET_ITEM;
@ -73,6 +98,16 @@ MarkdownInlineQuote::Type MarkdownInlineQuote::getType() const
return Type::INLINE_QUOTE; return Type::INLINE_QUOTE;
} }
MarkdownCustomInline::MarkdownCustomInline(const std::string& delimiter)
: mDelimiter(delimiter)
{
}
MarkdownCustomInline::Type MarkdownCustomInline::getType() const
{
return Type::CUSTOM_INLINE;
};
MarkdownLink::MarkdownLink(const std::string& target) MarkdownLink::MarkdownLink(const std::string& target)
: mTarget(target) : mTarget(target)
{ {
@ -89,14 +124,6 @@ MarkdownLink::Type MarkdownLink::getType() const
return Type::LINK; return Type::LINK;
} }
void MarkdownLink::doFieldSubstitution(Type elementType, const std::string& searchPhrase, const std::string& replacementPhrase)
{
if (elementType == Type::LINK)
{
mTarget = StringUtils::replaceWith(mTarget, searchPhrase, replacementPhrase);
}
}
MarkdownImage::MarkdownImage(const std::string& source, const std::string& alt) MarkdownImage::MarkdownImage(const std::string& source, const std::string& alt)
: mSource(source), : mSource(source),
mAlt(alt) mAlt(alt)
@ -129,3 +156,15 @@ MarkdownMultilineQuote::Type MarkdownMultilineQuote::getType() const
{ {
return Type::MULTILINE_QUOTE; return Type::MULTILINE_QUOTE;
} }
MarkdownCustomMultiLine::MarkdownCustomMultiLine(const std::string& tag, const std::string& delimiter)
: mTag(tag),
mDelimiter(delimiter)
{
}
MarkdownCustomMultiLine::Type MarkdownCustomMultiLine::getType() const
{
return Type::CUSTOM_MULTILINE;
}

View file

@ -29,25 +29,34 @@ public:
{ {
mTarget = target; mTarget = target;
} }
void doFieldSubstitution(Type elementType, const std::string& searchPhrase, const std::string& replacementPhrase) override;
private: private:
std::string mTarget; std::string mTarget;
}; };
class MarkdownParagraph : public MarkdownElement class MarkdownElementWithChildren : public MarkdownElement
{ {
public: public:
virtual ~MarkdownParagraph() = default;
Type getType() const override;
void addChild(std::unique_ptr<MarkdownInlineElement> child); void addChild(std::unique_ptr<MarkdownInlineElement> child);
std::size_t getNumChildren() const; std::size_t getNumChildren() const;
MarkdownInlineElement* getChild(std::size_t idx) const; MarkdownInlineElement* getChild(std::size_t idx) const;
MarkdownInlineElement* getLastChild() const;
std::vector<MarkdownLink*> getAllLinks() const;
private:
std::vector<std::unique_ptr<MarkdownInlineElement> > mChildren;
};
class MarkdownParagraph : public MarkdownElementWithChildren
{
public:
virtual ~MarkdownParagraph() = default;
Type getType() const override;
void doFieldSubstitution(Type elementType, const std::string& searchPhrase, const std::string& replacementPhrase) override void doFieldSubstitution(Type elementType, const std::string& searchPhrase, const std::string& replacementPhrase) override
{ {
for(auto& child : mChildren) for(auto& child : mChildren)
@ -55,25 +64,11 @@ public:
child->doFieldSubstitution(elementType, searchPhrase, replacementPhrase); child->doFieldSubstitution(elementType, searchPhrase, replacementPhrase);
} }
} }
std::vector<MarkdownLink*> getAllLinks() const
{
std::vector<MarkdownLink*> links;
for(auto& child : mChildren)
{
if (child->getType() == Type::LINK)
{
links.push_back(dynamic_cast<MarkdownLink*>(child.get()));
}
}
return links;
}
private: private:
std::vector<std::unique_ptr<MarkdownInlineElement> > mChildren; std::vector<std::unique_ptr<MarkdownInlineElement> > mChildren;
}; };
class MarkdownBulletItem : public MarkdownElement class MarkdownBulletItem : public MarkdownElementWithChildren
{ {
public: public:
virtual ~MarkdownBulletItem() = default; virtual ~MarkdownBulletItem() = default;
@ -122,6 +117,17 @@ public:
Type getType() const override; Type getType() const override;
}; };
class MarkdownCustomInline : public MarkdownInlineElement
{
public:
MarkdownCustomInline(const std::string& delimiter);
virtual ~MarkdownCustomInline() = default;
Type getType() const override;
private:
std::string mDelimiter;
};
class MarkdownImage : public MarkdownInlineElement class MarkdownImage : public MarkdownInlineElement
{ {
public: public:
@ -140,7 +146,7 @@ private:
std::string mAlt; std::string mAlt;
}; };
class MarkdownMultilineQuote : public MarkdownElement class MarkdownMultilineQuote : public MarkdownMultilineElement
{ {
public: public:
MarkdownMultilineQuote(const std::string& tag); MarkdownMultilineQuote(const std::string& tag);
@ -151,3 +157,16 @@ public:
private: private:
std::string mTag; std::string mTag;
}; };
class MarkdownCustomMultiLine : public MarkdownMultilineElement
{
public:
MarkdownCustomMultiLine(const std::string& tag, const std::string& delimiter);
virtual ~MarkdownCustomMultiLine() = default;
Type getType() const override;
private:
std::string mTag;
std::string mDelimiter;
};

View file

@ -10,6 +10,39 @@
#include "MarkdownDocument.h" #include "MarkdownDocument.h"
void MarkdownConverter::onBlockElement(MarkdownElementWithChildren* mdElement, HtmlElement* htmlElement) const
{
for(unsigned idx=0; idx< mdElement->getNumChildren(); idx++)
{
auto child = mdElement->getChild(idx);
if (child->getType() == MarkdownElement::Type::INLINE_QUOTE)
{
auto html_quote = std::make_unique<HtmlCodeElement>();
html_quote->setText(child->getTextContent());
htmlElement->addChild(std::move(html_quote));
}
else if(child->getType() == MarkdownElement::Type::TEXT_SPAN)
{
auto html_text = std::make_unique<HtmlTextRun>();
html_text->setText(child->getTextContent());
htmlElement->addChild(std::move(html_text));
}
else if(child->getType() == MarkdownElement::Type::LINK)
{
auto link_element = dynamic_cast<MarkdownLink*>(child);
auto html_text = std::make_unique<HtmlHyperlinkElement>(link_element->getTarget());
html_text->setText(link_element->getTextContent());
htmlElement->addChild(std::move(html_text));
}
else if(child->getType() == MarkdownElement::Type::IMAGE)
{
auto link_element = dynamic_cast<MarkdownImage*>(child);
auto html_text = std::make_unique<HtmlImageElement>(link_element->getSource(), link_element->getAlt());
htmlElement->addChild(std::move(html_text));
}
}
}
void MarkdownConverter::convert(MarkdownDocument* markdownDoc, HtmlElement* parentElement) const void MarkdownConverter::convert(MarkdownDocument* markdownDoc, HtmlElement* parentElement) const
{ {
for(unsigned idx=0; idx<markdownDoc->getNumElements();idx++) for(unsigned idx=0; idx<markdownDoc->getNumElements();idx++)
@ -29,35 +62,8 @@ void MarkdownConverter::convert(MarkdownDocument* markdownDoc, HtmlElement* pare
auto html_p_element = std::make_unique<HtmlParagraphElement>(); auto html_p_element = std::make_unique<HtmlParagraphElement>();
auto para_element = dynamic_cast<MarkdownParagraph*>(md_element); auto para_element = dynamic_cast<MarkdownParagraph*>(md_element);
for(unsigned idx=0; idx< para_element->getNumChildren(); idx++) onBlockElement(para_element, html_p_element.get());
{
auto child = para_element->getChild(idx);
if (child->getType() == MarkdownElement::Type::INLINE_QUOTE)
{
auto html_quote = std::make_unique<HtmlCodeElement>();
html_quote->setText(child->getTextContent());
html_p_element->addChild(std::move(html_quote));
}
else if(child->getType() == MarkdownElement::Type::TEXT_SPAN)
{
auto html_text = std::make_unique<HtmlTextRun>();
html_text->setText(child->getTextContent());
html_p_element->addChild(std::move(html_text));
}
else if(child->getType() == MarkdownElement::Type::LINK)
{
auto link_element = dynamic_cast<MarkdownLink*>(child);
auto html_text = std::make_unique<HtmlHyperlinkElement>(link_element->getTarget());
html_text->setText(link_element->getTextContent());
html_p_element->addChild(std::move(html_text));
}
else if(child->getType() == MarkdownElement::Type::IMAGE)
{
auto link_element = dynamic_cast<MarkdownImage*>(child);
auto html_text = std::make_unique<HtmlImageElement>(link_element->getSource(), link_element->getAlt());
html_p_element->addChild(std::move(html_text));
}
}
parentElement->addChild(std::move(html_p_element)); parentElement->addChild(std::move(html_p_element));
} }
else if(md_element->getType() == MarkdownElement::Type::BULLET_LIST) else if(md_element->getType() == MarkdownElement::Type::BULLET_LIST)
@ -68,7 +74,9 @@ void MarkdownConverter::convert(MarkdownDocument* markdownDoc, HtmlElement* pare
{ {
auto child = list_element->getChild(idx); auto child = list_element->getChild(idx);
auto html_list_item = std::make_unique<HtmlListItem>(); auto html_list_item = std::make_unique<HtmlListItem>();
html_list_item->setText(child->getTextContent());
onBlockElement(child, html_list_item.get());
html_list->addChild(std::move(html_list_item)); html_list->addChild(std::move(html_list_item));
} }
parentElement->addChild(std::move(html_list)); parentElement->addChild(std::move(html_list));

View file

@ -4,6 +4,8 @@
class HtmlDocument; class HtmlDocument;
class HtmlElement; class HtmlElement;
class MarkdownElementWithChildren;
class MarkdownDocument; class MarkdownDocument;
class MarkdownConverter class MarkdownConverter
@ -13,4 +15,6 @@ public:
void convert(MarkdownDocument* markdownDoc, HtmlElement* parentElement) const; void convert(MarkdownDocument* markdownDoc, HtmlElement* parentElement) const;
private:
void onBlockElement(MarkdownElementWithChildren* mdElement, HtmlElement* htmlElement) const;
}; };

View file

@ -9,3 +9,8 @@ const std::string& MarkdownElement::getTextContent() const
{ {
return mTextContent; return mTextContent;
} }
void MarkdownElement::addLine(const std::string& line)
{
mTextContent += line + "\n";
}

View file

@ -10,12 +10,10 @@ public:
HEADING, HEADING,
PARAGRAPH, PARAGRAPH,
TEXT_SPAN, TEXT_SPAN,
INLINE_CODE,
MULTILINE_CODE,
INLINE_QUOTE, INLINE_QUOTE,
MULTILINE_QUOTE, MULTILINE_QUOTE,
INLINE_SPECIAL, CUSTOM_INLINE,
MULTILINE_SPECIAL, CUSTOM_MULTILINE,
LINK, LINK,
IMAGE, IMAGE,
BULLET_ITEM, BULLET_ITEM,
@ -26,6 +24,8 @@ public:
void appendTextContent(const std::string& content); void appendTextContent(const std::string& content);
void addLine(const std::string& line);
const std::string& getTextContent() const; const std::string& getTextContent() const;
virtual Type getType() const = 0; virtual Type getType() const = 0;
@ -43,3 +43,9 @@ class MarkdownInlineElement : public MarkdownElement
public: public:
virtual ~MarkdownInlineElement() = default; virtual ~MarkdownInlineElement() = default;
}; };
class MarkdownMultilineElement : public MarkdownElement
{
public:
virtual ~MarkdownMultilineElement() = default;
};

View file

@ -1,15 +1,21 @@
#include "MarkdownParser.h" #include "MarkdownParser.h"
#include "MarkdownDocument.h" #include "MarkdownDocument.h"
#include "StringUtils.h"
#include "MarkdownComponents.h" #include "MarkdownComponents.h"
#include "Lexer.h"
#include "StringUtils.h"
#include <sstream> #include <sstream>
#include <iostream> #include <iostream>
static constexpr char MULTILINE_QUOTE_DELIMITER[]{"```"};
static constexpr char HEADING_DELIMITER{'#'};
MarkdownParser::MarkdownParser() MarkdownParser::MarkdownParser()
{ {
mCustomMultilineDelimiters = {{"$$"}};
mCustomInlineDelimiters = {{"$"}};
} }
MarkdownParser::~MarkdownParser() MarkdownParser::~MarkdownParser()
@ -17,362 +23,345 @@ MarkdownParser::~MarkdownParser()
} }
void MarkdownParser::onMultilineQuote() bool MarkdownParser::isInMultilineBlock() const
{ {
auto quote = std::make_unique<MarkdownMultilineQuote>(mWorkingTag); if (!mWorkingElement)
quote->appendTextContent(mDocumentContent); {
return false;
mDocumentContent.clear(); }
mWorkingTag.clear(); auto working_type = mWorkingElement->getType();
return working_type == MarkdownElement::Type::MULTILINE_QUOTE || working_type == MarkdownElement::Type::CUSTOM_MULTILINE ;
mDocumentState = DocumentState::NONE;
mMarkdownDocument->addElement(std::move(quote));
onNewParagraph();
} }
void MarkdownParser::onInlineQuote() unsigned MarkdownParser::checkForLink(const std::string& lineSection)
{ {
auto quote = std::make_unique<MarkdownInlineQuote>(); if (lineSection.empty())
quote->appendTextContent(mLineContent);
mLineContent.clear();
mLineState = LineState::NONE;
if(mWorkingParagraph)
{ {
mWorkingParagraph->addChild(std::move(quote)); return 0;
} }
std::vector<std::string> hits;
unsigned hit_size{0};
if (Lexer::matchPattern("[@](@)", lineSection, '@', hits))
{
if (hits.size() == 2)
{
auto tag = hits[0];
auto target = hits[1];
onTextSpanFinished();
auto element = std::make_unique<MarkdownLink>(target);
element->appendTextContent(tag);
addChildToWorkingElement(std::move(element));
hit_size = 4 + tag.size() + target.size();
}
}
return hit_size;
} }
void MarkdownParser::onHeading(unsigned level) unsigned MarkdownParser::checkForImage(const std::string& lineSection)
{ {
auto heading = std::make_unique<MarkdownHeading>(level); if (lineSection.empty())
heading->appendTextContent(mLineContent); {
mMarkdownDocument->addElement(std::move(heading)); return 0;
}
std::vector<std::string> hits;
unsigned hit_size{0};
if (Lexer::matchPattern("![@](@)", lineSection, '@', hits))
{
if (hits.size() == 2)
{
auto alt = hits[0];
auto source = hits[1];
onTextSpanFinished();
auto element = std::make_unique<MarkdownImage>(source, alt);
addChildToWorkingElement(std::move(element));
hit_size = 5 + alt.size() + source.size();
}
}
return hit_size;
} }
void MarkdownParser::onNewParagraph() unsigned MarkdownParser::checkForInlineQuote(const std::string& lineSection)
{ {
if (mWorkingBulletList) if (lineSection.empty())
{ {
mMarkdownDocument->addElement(std::move(mWorkingBulletList)); return 0;
mWorkingBulletList.reset();
mDocumentState == DocumentState::NONE;
} }
else if (mWorkingParagraph)
{
onTextSpan();
if (!mWorkingParagraph->getNumChildren() == 0) std::vector<std::string> hits;
unsigned hit_size{0};
if (Lexer::matchPattern("`@`", lineSection, '@', hits))
{ {
mMarkdownDocument->addElement(std::move(mWorkingParagraph)); if (hits.size() == 1)
{
auto content = hits[0];
onTextSpanFinished();
auto element = std::make_unique<MarkdownInlineQuote>();
element->appendTextContent(content);
addChildToWorkingElement(std::move(element));
hit_size = 2 + content.size();
} }
} }
mWorkingParagraph = std::make_unique<MarkdownParagraph>(); return hit_size;
}
unsigned MarkdownParser::checkForCustomInline(const std::string& lineSection)
{
if (lineSection.empty())
{
return 0;
}
std::vector<std::string> hits;
unsigned hit_size{0};
for(unsigned idx=0; idx<mCustomInlineDelimiters.size(); idx++)
{
const auto delimiter = mCustomInlineDelimiters[idx];
if (Lexer::matchPattern(delimiter + "@" + delimiter, lineSection, '@', hits))
{
if (hits.size() == 1)
{
auto content = hits[0];
onTextSpanFinished();
auto element = std::make_unique<MarkdownCustomInline>(delimiter);
element->appendTextContent(content);
addChildToWorkingElement(std::move(element));
hit_size = 2*delimiter.size() + content.size();
break;
}
}
}
return hit_size;
} }
void MarkdownParser::onTextSpan() void MarkdownParser::onTextSpanFinished()
{ {
mLineContent.clear(); if (!mWorkingLine.empty())
if(mWorkingParagraph && !mDocumentContent.empty())
{ {
if (mWorkingTextSpan)
{
std::cout << "Adding to existing text span: " << std::endl;
mWorkingTextSpan->appendTextContent(mWorkingLine);
}
else
{
std::cout << "Adding new text span: " << mWorkingLine << std::endl;
auto text_span = std::make_unique<MarkdownTextSpan>(); auto text_span = std::make_unique<MarkdownTextSpan>();
text_span->appendTextContent(mDocumentContent); text_span->addLine(mWorkingLine);
mWorkingParagraph->addChild(std::move(text_span)); mWorkingTextSpan = text_span.get();
mDocumentContent.clear();
addChildToWorkingElement(std::move(text_span));
}
mWorkingLine.clear();
mWorkingTextSpan = nullptr;
} }
} }
std::pair<unsigned, bool> MarkdownParser::onTick(unsigned tickCount) void MarkdownParser::addChildToWorkingElement(std::unique_ptr<MarkdownInlineElement> child)
{ {
unsigned new_tick_count = tickCount; dynamic_cast<MarkdownElementWithChildren*>(mWorkingElement)->addChild(std::move(child));
bool stop_line_processing = false;
if (tickCount == 2)
{
if (mDocumentState == DocumentState::IN_MULTILINEQUOTE)
{
onMultilineQuote();
stop_line_processing = true;
}
else
{
onNewParagraph();
mLineState = LineState::IN_MULTILINE_TAG;
new_tick_count = 0;
mDocumentState = DocumentState::IN_MULTILINEQUOTE;
}
}
else if(mLineState == LineState::IN_INLINEQUOTE)
{
if (mLineContent.empty())
{
mLineState = LineState::NONE;
new_tick_count++;
}
else
{
new_tick_count = 0;
onInlineQuote();
}
}
else if(mDocumentState == DocumentState::IN_MULTILINEQUOTE)
{
new_tick_count++;
mLineContent += '`';
}
else
{
new_tick_count++;
mLineState = LineState::IN_INLINEQUOTE;
}
return {new_tick_count, stop_line_processing};
} }
void MarkdownParser::onLink() void MarkdownParser::processLine(const std::string& line)
{ {
auto element = std::make_unique<MarkdownLink>(mLineContent); if (isInMultilineBlock())
mLineContent.clear();
element->appendTextContent(mWorkingTag);
mWorkingTag.clear();
if (mWorkingParagraph)
{
mWorkingParagraph->addChild(std::move(element));
}
mLineState = LineState::NONE;
}
void MarkdownParser::onImage()
{
auto element = std::make_unique<MarkdownImage>(mLineContent, mWorkingTag);
mLineContent.clear();
element->appendTextContent(mWorkingTag);
mWorkingTag.clear();
if (mWorkingParagraph)
{
mWorkingParagraph->addChild(std::move(element));
}
mLineState = LineState::NONE;
}
void MarkdownParser::onBulletItem()
{
if (!mWorkingBulletList)
{
mWorkingBulletList = std::make_unique<MarkdownBulletList>();
mDocumentState == DocumentState::IN_BULLETS;
}
auto item = std::make_unique<MarkdownBulletItem>();
item->appendTextContent(mLineContent);
mLineContent.clear();
mWorkingBulletList->addChild(std::move(item));
}
void MarkdownParser::processLine()
{
mLineContent.clear();
mLineState = LineState::NONE;
unsigned heading_level{0};
unsigned tick_count{0};
bool flushed_pre_inline = false;
bool first_nonspace = false;
for(auto c : mWorkingLine)
{
if (!StringUtils::IsSpace(c))
{
if (first_nonspace)
{
first_nonspace = false;
}
else
{
first_nonspace = true;
}
}
else
{
first_nonspace = false;
}
if (c == '`')
{
auto [ret_tick_count, stop_line_processing] = onTick(tick_count);
tick_count = ret_tick_count;
if(stop_line_processing)
{ {
mWorkingElement->addLine(line);
return; return;
} }
}
else if (!mWorkingElement)
{ {
if (mLineState == LineState::IN_INLINEQUOTE) std::cout << "Adding new paragraph " << std::endl;
{ auto paragraph = std::make_unique<MarkdownParagraph>();
if (!flushed_pre_inline) mWorkingElement = paragraph.get();
{ mMarkdownDocument->addElement(std::move(paragraph));
mDocumentContent += mLineContent;
onTextSpan();
flushed_pre_inline = true;
} }
mLineContent += c;
} if (mWorkingElement && mWorkingElement->getType() == MarkdownElement::Type::PARAGRAPH)
else if (mDocumentState == DocumentState::IN_MULTILINEQUOTE)
{ {
mLineContent += c; if (auto last_text_span = dynamic_cast<MarkdownParagraph*>(mWorkingElement)->getLastChild())
}
else if(mLineState == LineState::IN_LINK_TAG)
{ {
if (c == ']') mWorkingTextSpan = last_text_span;
{
mLineState = LineState::AWAITING_LINK_BODY;
}
else
{
mWorkingTag += c;
}
}
else if(mLineState == LineState::AWAITING_LINK_BODY)
{
if (c == '(')
{
mLineState = LineState::IN_LINK_BODY;
}
else
{
mLineContent = '[' + mWorkingTag + ']';
mLineState = LineState::NONE;
}
}
else if(mLineState == LineState::IN_LINK_BODY)
{
if(c==')')
{
onLink();
}
else
{
mLineContent += c;
}
}
else if(mLineState == LineState::AWAITING_IMG_TAG)
{
if (c == '[')
{
mLineState = LineState::IN_IMG_TAG;
}
else
{
mLineContent = "![";
mLineState = LineState::NONE;
}
}
else if(mLineState == LineState::IN_IMG_TAG)
{
if (c == ']')
{
mLineState = LineState::AWAITING_IMG_BODY;
}
else
{
mWorkingTag += c;
}
}
else if(mLineState == LineState::AWAITING_IMG_BODY)
{
if (c == '(')
{
mLineState = LineState::IN_IMG_BODY;
}
else
{
mLineContent = "![" + mWorkingTag + "]";
mWorkingTag.clear();
mLineState = LineState::NONE;
}
}
else if(mLineState == LineState::IN_IMG_BODY)
{
if (c == ')')
{
onImage();
}
else
{
mLineContent += c;
}
}
else
{
if (c == '#')
{
onNewParagraph();
mLineState = LineState::IN_HEADING;
heading_level++;
}
else if(c == '[')
{
mDocumentContent += mLineContent;
onTextSpan();
mLineState = LineState::IN_LINK_TAG;
}
else if(c == '!')
{
mDocumentContent += mLineContent;
onTextSpan();
mLineState = LineState::AWAITING_IMG_TAG;
}
else if(first_nonspace && c == '*')
{
if (!mWorkingBulletList)
{
onNewParagraph();
}
mLineState = LineState::IN_BULLETS;
}
else
{
mLineContent += c;
}
}
} }
} }
if (mLineState == LineState::IN_HEADING) unsigned line_position = 0;
mWorkingLine.clear();
while(line_position < line.size())
{ {
onHeading(heading_level); const auto remaining = line.substr(line_position, line.size() - line_position);
if(auto length = checkForImage(remaining))
{
line_position += length;
} }
else if(mLineState == LineState::IN_MULTILINE_TAG) else if(auto length = checkForLink(remaining))
{ {
mWorkingTag = mLineContent; line_position += length;
} }
else if (mLineState == LineState::IN_INLINEQUOTE) else if(auto length = checkForInlineQuote(remaining))
{ {
onTextSpan(); line_position += length;
} }
else if (mLineState == LineState::IN_BULLETS) else if(auto length = checkForCustomInline(remaining))
{ {
onBulletItem(); line_position += length;
} }
else else
{ {
if (mLineContent.size() > 0) mWorkingLine += line[line_position];
{ line_position++;
mDocumentContent.append(mLineContent);
} }
} }
onTextSpanFinished();
} }
void MarkdownParser::onEmptyLine() void MarkdownParser::onEmptyLine()
{ {
onNewParagraph(); if (!isInMultilineBlock())
{
onSectionFinished();
}
}
bool MarkdownParser::startsWithMultiLineQuote(const std::string& line) const
{
const bool ignore_whitespace{true};
return StringUtils::startsWith(line, MULTILINE_QUOTE_DELIMITER, ignore_whitespace);
}
int MarkdownParser::startsWithCustomMultilineBlock(const std::string& line) const
{
for(unsigned idx=0; idx<mCustomMultilineDelimiters.size(); idx++)
{
if (StringUtils::startsWith(line, mCustomMultilineDelimiters[idx], true))
{
return idx;
}
}
return -1;
}
bool MarkdownParser::startsWithHeading(const std::string& line) const
{
return StringUtils::startsWith(line, "#", true);
}
bool MarkdownParser::startsWithBulletItem(const std::string& line) const
{
return StringUtils::startsWith(line, "*", true);
}
void MarkdownParser::onFoundMultiLineQuote(const std::string& line)
{
if (mWorkingElement && mWorkingElement->getType() == MarkdownElement::Type::MULTILINE_QUOTE)
{
onSectionFinished();
}
else if(isInMultilineBlock())
{
processLine(line);
}
else
{
const auto tag = StringUtils::removeUpTo(line, MULTILINE_QUOTE_DELIMITER);
auto quote = std::make_unique<MarkdownMultilineQuote>(tag);
mWorkingElement = quote.get();
mMarkdownDocument->addElement(std::move(quote));
}
}
void MarkdownParser::onFoundCustomMultiLineBlock(const std::string& line, unsigned blockSlot)
{
if (mWorkingElement && mWorkingElement->getType() == MarkdownElement::Type::CUSTOM_MULTILINE && blockSlot == mCustomDelimiterIndex)
{
onSectionFinished();
}
else if(isInMultilineBlock())
{
processLine(line);
}
else
{
const auto delimiter = mCustomMultilineDelimiters[blockSlot];
const auto tag = StringUtils::removeUpTo(line, delimiter);
auto quote = std::make_unique<MarkdownCustomMultiLine>(tag, delimiter);
mWorkingElement = quote.get();
mMarkdownDocument->addElement(std::move(quote));
}
}
void MarkdownParser::onFoundHeading(const std::string& line)
{
if(isInMultilineBlock())
{
processLine(line);
}
else
{
onSectionFinished();
unsigned level = StringUtils::countFirstConsecutiveHits(line, HEADING_DELIMITER);
auto heading = std::make_unique<MarkdownHeading>(level);
std::string prefix;
for(unsigned idx=0; idx<level; idx++)
{
prefix += HEADING_DELIMITER;
}
heading->appendTextContent(StringUtils::stripSurroundingWhitepsace(StringUtils::removeUpTo(line, prefix)));
mMarkdownDocument->addElement(std::move(heading));
}
}
void MarkdownParser::onFoundBulletItem(const std::string& line)
{
if(isInMultilineBlock())
{
processLine(line);
}
else
{
if (mWorkingBulletList)
{
auto item = std::make_unique<MarkdownBulletItem>();
mWorkingElement = item.get();
mWorkingBulletList->addChild(std::move(item));
}
else
{
std::cout << "Starting new bullet list" << std::endl;
auto bullet_list = std::make_unique<MarkdownBulletList>();
mWorkingBulletList = bullet_list.get();
mMarkdownDocument->addElement(std::move(bullet_list));
auto bullet_item = std::make_unique<MarkdownBulletItem>();
mWorkingElement = bullet_item.get();
mWorkingBulletList->addChild(std::move(bullet_item));
processLine(StringUtils::removeUpTo(line, "*"));
}
}
}
void MarkdownParser::onSectionFinished()
{
std::cout << "Section is finished" << std::endl;
mWorkingElement = nullptr;
mWorkingBulletList = nullptr;
mWorkingTextSpan = nullptr;
} }
std::unique_ptr<MarkdownDocument> MarkdownParser::run(const std::string& content) std::unique_ptr<MarkdownDocument> MarkdownParser::run(const std::string& content)
@ -384,17 +373,39 @@ std::unique_ptr<MarkdownDocument> MarkdownParser::run(const std::string& content
while (std::getline(ss, line, '\n')) while (std::getline(ss, line, '\n'))
{ {
if (line.empty()) std::cout << "Processing line " << line << std::endl;
if (StringUtils::isWhitespaceOnly(line))
{ {
std::cout << "Is whitespace only " << std::endl;
onEmptyLine(); onEmptyLine();
continue; continue;
} }
mWorkingLine = line; else if (startsWithMultiLineQuote(line))
processLine(); {
std::cout << "Found multiline quote" << std::endl;
onFoundMultiLineQuote(line);
}
else if (auto result = startsWithCustomMultilineBlock(line); result >= 0)
{
std::cout << "Found custom multiline" << std::endl;
onFoundCustomMultiLineBlock(line, result);
}
else if (startsWithHeading(line))
{
std::cout << "Found heading" << std::endl;
onFoundHeading(line);
}
else if(startsWithBulletItem(line))
{
std::cout << "Found bulletitem" << std::endl;
onFoundBulletItem(line);
}
else
{
std::cout << "Found nothing - process line" << std::endl;
processLine(line);
}
} }
onTextSpan();
onNewParagraph();
return std::move(mMarkdownDocument); return std::move(mMarkdownDocument);
} }

View file

@ -2,36 +2,15 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector>
class MarkdownDocument; class MarkdownDocument;
class MarkdownParagraph; class MarkdownElement;
class MarkdownInlineElement;
class MarkdownBulletList; class MarkdownBulletList;
class MarkdownParser class MarkdownParser
{ {
enum class DocumentState
{
NONE,
IN_MULTILINEQUOTE,
IN_BULLETS
};
enum class LineState
{
NONE,
IN_HEADING,
IN_INLINEQUOTE,
IN_MULTILINE_TAG,
IN_LINK_TAG,
AWAITING_LINK_BODY,
IN_LINK_BODY,
AWAITING_IMG_TAG,
IN_IMG_TAG,
AWAITING_IMG_BODY,
IN_IMG_BODY,
IN_BULLETS
};
public: public:
MarkdownParser(); MarkdownParser();
@ -40,34 +19,40 @@ public:
std::unique_ptr<MarkdownDocument> run(const std::string& content); std::unique_ptr<MarkdownDocument> run(const std::string& content);
private: private:
void processLine(); void addChildToWorkingElement(std::unique_ptr<MarkdownInlineElement> child);
void onMultilineQuote(); unsigned checkForImage(const std::string& lineSection);
void onInlineQuote(); unsigned checkForLink(const std::string& lineSection);
void onHeading(unsigned level); unsigned checkForInlineQuote(const std::string& lineSection);
void onLink(); unsigned checkForCustomInline(const std::string& lineSection);
void onImage();
bool isInMultilineBlock() const;
bool startsWithMultiLineQuote(const std::string& line) const;
int startsWithCustomMultilineBlock(const std::string& line) const;
bool startsWithHeading(const std::string& line) const;
bool startsWithBulletItem(const std::string& line) const;
void onFoundMultiLineQuote(const std::string& line);
void onFoundCustomMultiLineBlock(const std::string& line, unsigned blockSlot);
void onFoundHeading(const std::string& line);
void onFoundBulletItem(const std::string& line);
void onEmptyLine(); void onEmptyLine();
void onNewParagraph(); void onSectionFinished();
void onTextSpanFinished();
void onBulletItem(); void processLine(const std::string& line);
void onTextSpan(); unsigned mCustomDelimiterIndex{0};
std::vector<std::string> mCustomMultilineDelimiters;
std::vector<std::string> mCustomInlineDelimiters;
std::pair<unsigned, bool> onTick(unsigned tickCount); MarkdownElement* mWorkingElement{nullptr};
MarkdownBulletList* mWorkingBulletList{nullptr};
MarkdownInlineElement* mWorkingTextSpan{nullptr};
std::string mWorkingLine; std::string mWorkingLine;
std::string mLineContent;
std::string mDocumentContent;
std::string mWorkingTag;
LineState mLineState {LineState::NONE};
DocumentState mDocumentState {DocumentState::NONE};
std::unique_ptr<MarkdownParagraph> mWorkingParagraph;
std::unique_ptr<MarkdownBulletList> mWorkingBulletList;
std::unique_ptr<MarkdownDocument> mMarkdownDocument; std::unique_ptr<MarkdownDocument> mMarkdownDocument;
}; };

View file

@ -53,11 +53,11 @@ void XmlParser::processLine(const std::string& input)
void XmlParser::onChar(char c) void XmlParser::onChar(char c)
{ {
if(StringUtils::IsAlphaNumeric(c)) if(StringUtils::isAlphaNumeric(c))
{ {
onAlphaNumeric(c); onAlphaNumeric(c);
} }
else if(StringUtils::IsSpace(c)) else if(StringUtils::isSpace(c))
{ {
onSpace(c); onSpace(c);
} }

View file

@ -98,10 +98,12 @@ XmlElement* XmlElement::getChild(std::size_t index) const
return mChildren[index].get(); return mChildren[index].get();
} }
std::string XmlElement::toString(unsigned depth) const std::string XmlElement::toString(unsigned depth, bool keepInline) const
{ {
const auto prefix = std::string(2*depth, ' '); const auto prefix = std::string(2*depth, ' ');
std::string line_ending = keepInline ? "" : "\n";
auto content = prefix + "<" + getTagName(); auto content = prefix + "<" + getTagName();
for (std::size_t idx=0; idx< getNumAttributes(); idx++) for (std::size_t idx=0; idx< getNumAttributes(); idx++)
{ {
@ -112,7 +114,7 @@ std::string XmlElement::toString(unsigned depth) const
const auto num_children = getNumChildren(); const auto num_children = getNumChildren();
if (num_children == 0 && getText().empty()) if (num_children == 0 && getText().empty())
{ {
content += "/>\n"; content += "/>" + line_ending;
return content; return content;
} }
else else
@ -127,18 +129,18 @@ std::string XmlElement::toString(unsigned depth) const
if (num_children>0) if (num_children>0)
{ {
content += "\n"; content += line_ending;
} }
for (std::size_t idx=0; idx< getNumChildren(); idx++) for (std::size_t idx=0; idx< getNumChildren(); idx++)
{ {
auto child = getChild(idx); auto child = getChild(idx);
content += child->toString(depth+1); content += child->toString(depth+1, keepInline);
} }
if (num_children>0) if (num_children>0)
{ {
content += prefix; content += prefix;
} }
content += "</" + getTagName() + ">\n"; content += "</" + getTagName() + ">" + line_ending;
return content; return content;
} }

View file

@ -34,7 +34,7 @@ public:
void setText(const std::string& text); void setText(const std::string& text);
void setTagName(const std::string& tagName); void setTagName(const std::string& tagName);
virtual std::string toString(unsigned depth = 0) const; virtual std::string toString(unsigned depth = 0, bool keepInline = false) const;
protected: protected:
std::string mTagName; std::string mTagName;

View file

@ -1,5 +1,6 @@
set(COMPILER_UNIT_TEST_FILES set(COMPILER_UNIT_TEST_FILES
compiler/TestTemplatingEngine.cpp compiler/TestTemplatingEngine.cpp
compiler/TestLexer.cpp
PARENT_SCOPE PARENT_SCOPE
) )

View file

@ -0,0 +1,19 @@
#include "Lexer.h"
#include "TestFramework.h"
#include "TestUtils.h"
#include <iostream>
TEST_CASE(TestLexer_MatchPattern, "[compiler]")
{
std::string input = "[I'm inside the tag](I'm inside the brackets), followed by more text.";
std::string pattern = "[@](@)";
std::vector<std::string> hits;
const auto matched = Lexer::matchPattern(pattern, input, '@', hits);
REQUIRE(matched);
REQUIRE(hits.size() == 2);
REQUIRE(hits[0] == "I'm inside the tag");
REQUIRE(hits[1] == "I'm inside the brackets");
}

View file

@ -3,13 +3,28 @@
#include "TestFramework.h" #include "TestFramework.h"
#include "TestUtils.h" #include "TestUtils.h"
#include <iostream> TEST_CASE(TestStringUtils_StripSurroundingWhitepsace, "core")
TEST_CASE(TestStringUtils_Strip, "core")
{ {
std::string input = " super() "; std::string input = " super() ";
std::string stripped = StringUtils::strip(input); std::string stripped = StringUtils::stripSurroundingWhitepsace(input);
REQUIRE(stripped == "super()");
auto predicate = stripped == "super()"; }
REQUIRE(predicate);
TEST_CASE(TestStringUtils_RemoveUpTo, "core")
{
std::string input = "def{filename}abc/123/456";
std::string removed = StringUtils::removeUpTo(input, "{filename}");
REQUIRE(removed == "abc/123/456");
}
TEST_CASE(TestStringUtils_startsWith, "core")
{
std::string input = " ```some triple ticks ";
bool ignore_whitespace{false};
auto starts_with = StringUtils::startsWith(input, "```", ignore_whitespace);
REQUIRE(!starts_with);
ignore_whitespace = true;
starts_with = StringUtils::startsWith(input, "```", ignore_whitespace);
REQUIRE(starts_with);
} }

View file

@ -0,0 +1,30 @@
# I'm a level one header
I'm some text under level one
## I'm a level two header
I'm some text under level two
```
I'm a code block
```
I'm a line under the code block, with some `inline code`.
### I'm a level three header
I'm a bullet point list:
* First point
* Second point
* Third point
With a [hyperlink](www.imahyperlink.com) embedded.
# I'm another level one header
I'm some inline math $a = b + c$ and I'm some standalone math:
$$
d = e + f
$$
![This is an image](https://myoctocat.com/assets/images/base-octocat.svg)

View file

@ -1,17 +1,22 @@
#include "TestFramework.h" #include "TestFramework.h"
#include "CommandLineArgs.h"
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
#endif #endif
//int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PWSTR pCmdLine, int nCmdShow) //int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PWSTR pCmdLine, int nCmdShow)
int main() int main(int argc, char *argv[])
{ {
#ifdef _WIN32 #ifdef _WIN32
CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED); CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED);
#endif #endif
auto result = TestCaseRunner::getInstance().run(); auto args = CommandLineArgs::Create();
args->process(argc, argv);
auto result = TestCaseRunner::getInstance().run(args->getUserArgs());
#ifdef _WIN32 #ifdef _WIN32
CoUninitialize(); CoUninitialize();

View file

@ -36,17 +36,30 @@ void TestCaseRunner::markTestFailure(const std::string& line)
sFailureLine = line; sFailureLine = line;
} }
bool TestCaseRunner::run() bool TestCaseRunner::run(const std::vector<std::string>& args)
{ {
std::string test_to_run;
if (args.size() > 0 )
{
test_to_run = args[0];
}
FileLogger::GetInstance().disable(); FileLogger::GetInstance().disable();
for (auto test_case : mCases) for (auto test_case : mCases)
{ {
if (!test_to_run.empty())
{
if (test_case->getName() != test_to_run)
{
continue;
}
}
sLastTestFailed = false; sLastTestFailed = false;
std::cout << "TestFramework: Running Test - " << test_case->getName() << std::endl; std::cout << "TestFramework: Running Test - " << test_case->getName() << std::endl;
test_case->run(); test_case->run();
if (sLastTestFailed) if (sLastTestFailed)
{ {
std::cout << "Failed at line: " << sLastTestFailed << std::endl; std::cout << "Failed at line: " << sFailureLine << std::endl;
mFailingTests.push_back(test_case->getName()); mFailingTests.push_back(test_case->getName());
} }
} }

View file

@ -18,7 +18,7 @@ public:
void markTestFailure(const std::string& line); void markTestFailure(const std::string& line);
bool run(); bool run(const std::vector<std::string>& args);
private: private:
std::vector<std::string> mFailingTests; std::vector<std::string> mFailingTests;

View file

@ -17,9 +17,9 @@ struct Holder
#define REQUIRE(predicate) \ #define REQUIRE(predicate) \
if(!predicate) \ if(!bool(predicate)) \
{ \ { \
TestCaseRunner::getInstance().markTestFailure(std::to_string(__LINE__)); \ TestCaseRunner::getInstance().markTestFailure(std::to_string(__LINE__) + " with check: '" + std::string(#predicate) + "'"); \
return; \ return; \
} \ } \

View file

@ -7,10 +7,18 @@ using Path = std::filesystem::path;
class TestUtils class TestUtils
{ {
public: public:
static Path getTestOutputDir() static Path getTestOutputDir(const std::string& testFileName = {})
{
if (!testFileName.empty())
{
const auto name = Path(testFileName).filename().stem();
return std::filesystem::current_path() / "test_output" / name;
}
else
{ {
return std::filesystem::current_path() / "test_output"; return std::filesystem::current_path() / "test_output";
} }
}
static Path getTestDataDir() static Path getTestDataDir()
{ {

View file

@ -11,7 +11,9 @@
#include "TestFramework.h" #include "TestFramework.h"
#include "TestUtils.h" #include "TestUtils.h"
TEST_CASE(TestMarkdownParser, "web") #include <iostream>
TEST_CASE(TestMarkdownParser, "[web]")
{ {
File md_file(TestUtils::getTestDataDir() / "sample_markdown.md"); File md_file(TestUtils::getTestDataDir() / "sample_markdown.md");
const auto md_content = md_file.readText(); const auto md_content = md_file.readText();
@ -19,12 +21,59 @@ TEST_CASE(TestMarkdownParser, "web")
MarkdownParser parser; MarkdownParser parser;
auto md_doc = parser.run(md_content); auto md_doc = parser.run(md_content);
std::vector<MarkdownElement::Type> expected_top_level = {
MarkdownElement::Type::HEADING,
MarkdownElement::Type::PARAGRAPH,
MarkdownElement::Type::HEADING,
MarkdownElement::Type::PARAGRAPH,
MarkdownElement::Type::MULTILINE_QUOTE,
MarkdownElement::Type::PARAGRAPH,
MarkdownElement::Type::HEADING,
MarkdownElement::Type::PARAGRAPH
};
REQUIRE(expected_top_level.size() <= md_doc->getNumElements());
for(unsigned idx=0; idx<expected_top_level.size(); idx++)
{
REQUIRE(md_doc->getElement(idx)->getType() == expected_top_level[idx]);
}
MarkdownConverter converter; MarkdownConverter converter;
auto html = converter.convert(md_doc.get()); auto html = converter.convert(md_doc.get());
HtmlWriter writer; HtmlWriter writer;
const auto html_string = writer.toString(html.get()); const auto html_string = writer.toString(html.get());
File html_file(TestUtils::getTestOutputDir() / "TestMarkdownParserOut.html"); File html_file(TestUtils::getTestOutputDir(__FILE__) / "TestMarkdownParser.html");
html_file.writeText(html_string);
}
TEST_CASE(TestMarkdownParser_Simple, "[web]")
{
File md_file(TestUtils::getTestDataDir() / "simple_markdown.md");
const auto md_content = md_file.readText();
REQUIRE(!md_content.empty());
MarkdownParser parser;
auto md_doc = parser.run(md_content);
std::vector<MarkdownElement::Type> expected_top_level = {
MarkdownElement::Type::PARAGRAPH,
MarkdownElement::Type::BULLET_LIST};
//REQUIRE(expected_top_level.size() <= md_doc->getNumElements());
for(unsigned idx=0; idx<expected_top_level.size(); idx++)
{
//REQUIRE(md_doc->getElement(idx)->getType() == expected_top_level[idx]);
}
MarkdownConverter converter;
auto html = converter.convert(md_doc.get());
HtmlWriter writer;
const auto html_string = writer.toString(html.get());
File html_file(TestUtils::getTestOutputDir(__FILE__) / "TestMarkdownParser_simple.html");
html_file.writeText(html_string); html_file.writeText(html_string);
} }