From 8705859115d39e0d02215bb14160783323302dd6 Mon Sep 17 00:00:00 2001 From: James Grogan Date: Tue, 6 Dec 2022 18:02:43 +0000 Subject: [PATCH] Improvements for markdown parsing. --- src/compiler/CMakeLists.txt | 16 +- src/compiler/Lexer.cpp | 72 ++ src/compiler/Lexer.h | 11 + src/compiler/template_engine/TemplateFile.cpp | 2 +- src/core/CommandLineArgs.cpp | 10 + src/core/CommandLineArgs.h | 2 + src/core/StringUtils.cpp | 89 ++- src/core/StringUtils.h | 33 +- src/core/file_utilities/FileFormats.cpp | 2 +- src/core/file_utilities/FileFormats.h | 1 - src/core/http/HttpRequest.cpp | 4 +- src/core/http/HttpRequest.h | 1 - src/publishing/pdf/PdfXRefTable.cpp | 4 +- src/publishing/pdf/PdfXRefTable.h | 5 +- src/visual_elements/CMakeLists.txt | 13 +- src/web/CMakeLists.txt | 23 +- src/web/html/HtmlTextRun.h | 2 +- .../html/elements/HtmlParagraphElement.cpp | 46 ++ src/web/html/elements/HtmlParagraphElement.h | 3 +- src/web/markdown/MarkdownComponents.cpp | 61 +- src/web/markdown/MarkdownComponents.h | 65 +- src/web/markdown/MarkdownConverter.cpp | 68 +- src/web/markdown/MarkdownConverter.h | 4 + src/web/markdown/MarkdownElement.cpp | 5 + src/web/markdown/MarkdownElement.h | 14 +- src/web/markdown/MarkdownParser.cpp | 663 +++++++++--------- src/web/markdown/MarkdownParser.h | 73 +- src/web/xml/XmlParser.cpp | 4 +- src/web/xml/xml-elements/XmlElement.cpp | 12 +- src/web/xml/xml-elements/XmlElement.h | 2 +- test/compiler/CMakeLists.txt | 1 + test/compiler/TestLexer.cpp | 19 + test/core/TestStringUtils.cpp | 29 +- test/data/simple_markdown.md | 30 + test/test_runner.cpp | 9 +- test/test_utils/TestCaseRunner.cpp | 17 +- test/test_utils/TestCaseRunner.h | 2 +- test/test_utils/TestFramework.h | 12 +- test/test_utils/TestUtils.h | 12 +- test/web/TestMarkdownParser.cpp | 53 +- 40 files changed, 957 insertions(+), 537 deletions(-) create mode 100644 test/compiler/TestLexer.cpp create mode 100644 test/data/simple_markdown.md diff --git a/src/compiler/CMakeLists.txt b/src/compiler/CMakeLists.txt index 291c7c4..bfcca80 100644 --- a/src/compiler/CMakeLists.txt +++ b/src/compiler/CMakeLists.txt @@ -2,18 +2,18 @@ set(MODULE_NAME compiler) list(APPEND TARGET_HEADERS Lexer.h - template_engine/TemplatingEngine.h - template_engine/TemplateFile.h - template_engine/TemplateNode.h - template_engine/TemplateElements.h + template_engine/TemplatingEngine.h + template_engine/TemplateFile.h + template_engine/TemplateNode.h + template_engine/TemplateElements.h ) list(APPEND TARGET_SOURCES Lexer.cpp - template_engine/TemplatingEngine.cpp - template_engine/TemplateFile.cpp - template_engine/TemplateNode.cpp - template_engine/TemplateElements.cpp + template_engine/TemplatingEngine.cpp + template_engine/TemplateFile.cpp + template_engine/TemplateNode.cpp + template_engine/TemplateElements.cpp ) add_library(${MODULE_NAME} SHARED ${TARGET_SOURCES} ${TARGET_HEADERS}) diff --git a/src/compiler/Lexer.cpp b/src/compiler/Lexer.cpp index e69de29..869505f 100644 --- a/src/compiler/Lexer.cpp +++ b/src/compiler/Lexer.cpp @@ -0,0 +1,72 @@ +#include "Lexer.h" + +bool Lexer::matchPattern(const std::string& pattern, const std::string& checkString, char delimiter, std::vector& hitSequence) +{ + if (checkString.empty()) + { + return false; + } + + if (pattern.empty()) + { + return false; + } + + bool found_pattern = true; + unsigned check_idx = 0; + unsigned pattern_idx = 0; + + std::vector hits; + std::string working_hit; + while(check_idx < checkString.size()) + { + if (pattern_idx == pattern.size()) + { + break; + } + + auto check_char = checkString[check_idx]; + auto pattern_char = pattern[pattern_idx]; + if (pattern_char == delimiter) + { + if (pattern_idx + 1 < pattern.size()) + { + if (check_char == pattern[pattern_idx + 1]) + { + hits.push_back(working_hit); + working_hit.clear(); + pattern_idx++; + } + else + { + working_hit+=check_char; + check_idx++; + } + } + else + { + working_hit+=check_char; + check_idx++; + } + } + else + { + if (check_char == pattern_char) + { + check_idx++; + pattern_idx++; + } + else + { + found_pattern = false; + break; + } + } + } + + if (found_pattern) + { + hitSequence = hits; + } + return found_pattern; +} diff --git a/src/compiler/Lexer.h b/src/compiler/Lexer.h index e69de29..7f04304 100644 --- a/src/compiler/Lexer.h +++ b/src/compiler/Lexer.h @@ -0,0 +1,11 @@ +#pragma once + +#include +#include + +class Lexer +{ +public: + // e.g. Pattern [@](@) returns for input: [source](tag) and delimiter @ + static bool matchPattern(const std::string& pattern, const std::string& checkString, char delimiter, std::vector& hitSequence); +}; diff --git a/src/compiler/template_engine/TemplateFile.cpp b/src/compiler/template_engine/TemplateFile.cpp index 5368ee6..adf17d4 100644 --- a/src/compiler/template_engine/TemplateFile.cpp +++ b/src/compiler/template_engine/TemplateFile.cpp @@ -195,7 +195,7 @@ void TemplateFile::onFoundExtends(const std::vector args) void TemplateFile::onFoundExpression(const std::string& expression_string) { - auto stripped = StringUtils::strip(expression_string); + const auto stripped = StringUtils::stripSurroundingWhitepsace(expression_string); auto expression = std::make_unique(mWorkingNode, stripped); mWorkingNode->addChild(std::move(expression)); } diff --git a/src/core/CommandLineArgs.cpp b/src/core/CommandLineArgs.cpp index a742e02..9942b78 100644 --- a/src/core/CommandLineArgs.cpp +++ b/src/core/CommandLineArgs.cpp @@ -48,3 +48,13 @@ std::string CommandLineArgs::getArg(std::size_t index) const } return ""; } + +std::vector CommandLineArgs::getUserArgs() const +{ + std::vector user_args; + for(unsigned idx=1; idx getUserArgs() const; + private: std::vector mArugments; std::filesystem::path mLaunchPath; diff --git a/src/core/StringUtils.cpp b/src/core/StringUtils.cpp index b1fa58c..ff0457a 100644 --- a/src/core/StringUtils.cpp +++ b/src/core/StringUtils.cpp @@ -1,4 +1,5 @@ #include "StringUtils.h" + #include #include #include @@ -8,19 +9,17 @@ #include "Windows.h" #endif -bool StringUtils::IsAlphaNumeric(char c) +bool StringUtils::isAlphaNumeric(char c) { - std::locale loc; - return std::isalnum(c, loc); + return std::isalnum(c); } -bool StringUtils::IsSpace(char c) +bool StringUtils::isSpace(char c) { - std::locale loc; - return std::isspace(c, loc); + return std::isspace(c); } -bool StringUtils::IsAlphabetical(char c) +bool StringUtils::isAlphabetical(char c) { return std::isalpha(c); } @@ -46,7 +45,44 @@ std::vector StringUtils::toLines(const std::string& input) return result; } -std::string StringUtils::strip(const std::string& input) +bool StringUtils::isWhitespaceOnly(const std::string& input) +{ + if (input.empty()) + { + return true; + } + else + { + return std::all_of(input.cbegin(), input.cend(), [](char c){ return std::isspace(c); }); + } +} + +unsigned StringUtils::countFirstConsecutiveHits(const std::string& input, char c) +{ + auto found_id = input.find(c); + if(found_id == std::string::npos) + { + return 0; + } + else + { + unsigned count = 1; + for(unsigned idx=found_id+1; idx StringUtils::split(const std::string& input) return substrings; } - -std::string StringUtils::ToLower(const std::string& s) +std::string StringUtils::toLower(const std::string& s) { std::string ret; - std::transform(s.begin(), s.end(), ret.begin(), - [](unsigned char c){ return std::tolower(c); }); + std::transform(s.begin(), s.end(), ret.begin(), [](unsigned char c){ return std::tolower(c); }); return ret; } @@ -139,7 +173,7 @@ std::string StringUtils::convert(const std::wstring& input) #endif } -std::string StringUtils::ToPaddedString(unsigned numBytes, unsigned entry) +std::string StringUtils::toPaddedString(unsigned numBytes, unsigned entry) { std::stringstream sstr; sstr << std::setfill('0') << std::setw(numBytes) << entry; @@ -165,20 +199,35 @@ std::string StringUtils::stripQuotes(const std::string& input) return input.substr(start_index, end_index - start_index + 1); } -std::string StringUtils::replaceWith(const std::string& inputString, const std::string& searchString, const std::string& replaceString) -{ - return inputString; -} - std::string StringUtils::removeUpTo(const std::string& input, const std::string& prefix) { std::size_t found = input.find(prefix); - if (found!=std::string::npos) + if (found != std::string::npos) { - return input.substr(found, prefix.size()); + return input.substr(found + prefix.size(), input.size()-found); } else { return input; } } + +bool StringUtils::startsWith(const std::string& input, const std::string& prefix, bool ignoreWhitespace) +{ + if(ignoreWhitespace) + { + const auto loc = input.find(prefix); + if (loc == std::string::npos) + { + return false; + } + else + { + return isWhitespaceOnly(input.substr(0, loc)); + } + } + else + { + return input.find(prefix) == 0; + } +} diff --git a/src/core/StringUtils.h b/src/core/StringUtils.h index ab1603d..2c8fca5 100644 --- a/src/core/StringUtils.h +++ b/src/core/StringUtils.h @@ -16,26 +16,35 @@ public: static constexpr char SINGLE_QUOTE = '\''; static constexpr char COLON = ':'; - static bool IsAlphaNumeric(char c); + static unsigned countFirstConsecutiveHits(const std::string& input, char c); - static bool IsAlphabetical(char c); - - static bool IsSpace(char c); - static std::string ToLower(const std::string& s); static std::string convert(const std::wstring& input); - static std::string ToPaddedString(unsigned numBytes, unsigned entry); - static std::vector split(const std::string& input); - static std::string strip(const std::string& input); + + static bool isAlphaNumeric(char c); + + static bool isAlphabetical(char c); + + static bool isSpace(char c); + + static bool isWhitespaceOnly(const std::string& input); static std::string removeUpTo(const std::string& input, const std::string& prefix); - static std::vector toLines(const std::string& input); + static std::vector split(const std::string& input); + + static bool startsWith(const std::string& input, const std::string& prefix, bool ignoreWhitespace = false); + + static std::string stripSurroundingWhitepsace(const std::string& input); static std::string stripQuotes(const std::string& input); static std::vector toBytes(const std::string& input); + + static std::string toLower(const std::string& s); + + static std::vector toLines(const std::string& input); + + static std::string toPaddedString(unsigned numBytes, unsigned entry); + static std::string toString(const std::vector& bytes); - - static std::string replaceWith(const std::string& inputString, const std::string& searchString, const std::string& replaceString); - }; diff --git a/src/core/file_utilities/FileFormats.cpp b/src/core/file_utilities/FileFormats.cpp index da885ef..925b28d 100644 --- a/src/core/file_utilities/FileFormats.cpp +++ b/src/core/file_utilities/FileFormats.cpp @@ -13,7 +13,7 @@ FileFormat::ExtensionMap FileFormat::mExtensions = [] bool FileFormat::isFormat(const std::string& extension, Format format) { - return StringUtils::ToLower(extension) == mExtensions[format]; + return StringUtils::toLower(extension) == mExtensions[format]; } FileFormat::Format FileFormat::inferFormat(const std::string& query) diff --git a/src/core/file_utilities/FileFormats.h b/src/core/file_utilities/FileFormats.h index 2c2f032..0dccf8f 100644 --- a/src/core/file_utilities/FileFormats.h +++ b/src/core/file_utilities/FileFormats.h @@ -25,7 +25,6 @@ public: }; using ExtensionMap = std::map; - public: static bool isFormat(const std::string& extension, Format format); diff --git a/src/core/http/HttpRequest.cpp b/src/core/http/HttpRequest.cpp index 82d5b41..29d863c 100644 --- a/src/core/http/HttpRequest.cpp +++ b/src/core/http/HttpRequest.cpp @@ -38,7 +38,7 @@ void HttpRequest::parseFirstLine(const std::string& line) const auto c = line[idx]; if (inPath) { - if (StringUtils::IsSpace(c)) + if (StringUtils::isSpace(c)) { inPath = false; inMethod = true; @@ -50,7 +50,7 @@ void HttpRequest::parseFirstLine(const std::string& line) } else if (inMethod) { - if (StringUtils::IsSpace(c)) + if (StringUtils::isSpace(c)) { inMethod = false; inProtocol = true; diff --git a/src/core/http/HttpRequest.h b/src/core/http/HttpRequest.h index 32c69dd..4630ddf 100644 --- a/src/core/http/HttpRequest.h +++ b/src/core/http/HttpRequest.h @@ -12,7 +12,6 @@ public: void parseMessage(const std::string& message); private: - void parseFirstLine(const std::string& line); HttpHeader mHeader; diff --git a/src/publishing/pdf/PdfXRefTable.cpp b/src/publishing/pdf/PdfXRefTable.cpp index 1dbec09..a340f1a 100644 --- a/src/publishing/pdf/PdfXRefTable.cpp +++ b/src/publishing/pdf/PdfXRefTable.cpp @@ -16,8 +16,8 @@ std::string PdfXRefTable::toString() content += "\n"; for (const auto& record : section.mRecords) { - auto offsetString = StringUtils::ToPaddedString(10, record.mOffsetBytes); - auto generationString = StringUtils::ToPaddedString(5, record.mGenerationNumber); + auto offsetString = StringUtils::toPaddedString(10, record.mOffsetBytes); + auto generationString = StringUtils::toPaddedString(5, record.mGenerationNumber); auto freeString = record.mIsFree ? "f" : "n"; content += offsetString + " " + generationString + " " + freeString + "\n"; diff --git a/src/publishing/pdf/PdfXRefTable.h b/src/publishing/pdf/PdfXRefTable.h index f27ec1f..961209a 100644 --- a/src/publishing/pdf/PdfXRefTable.h +++ b/src/publishing/pdf/PdfXRefTable.h @@ -22,14 +22,13 @@ class PdfXRefTable public: PdfXRefTable(); - std::string toString(); + void addRecord(unsigned numBytes, unsigned generation, unsigned isFree); unsigned getNextOffset(); - void addRecord(unsigned numBytes, unsigned generation, unsigned isFree); - unsigned getNumEntries(); + std::string toString(); private: unsigned mLastAddedBytes{0}; std::vector mSections; diff --git a/src/visual_elements/CMakeLists.txt b/src/visual_elements/CMakeLists.txt index 974b7e4..49733ec 100644 --- a/src/visual_elements/CMakeLists.txt +++ b/src/visual_elements/CMakeLists.txt @@ -1,3 +1,5 @@ +set(MODULE_NAME visual_elements) + list(APPEND visual_elements_LIB_INCLUDES GeometryNode.cpp basic_shapes/RectangleNode.cpp @@ -16,15 +18,14 @@ list(APPEND visual_elements_LIB_INCLUDES ) -add_library(visual_elements SHARED ${visual_elements_LIB_INCLUDES}) +add_library(${MODULE_NAME} SHARED ${visual_elements_LIB_INCLUDES}) -target_include_directories(visual_elements PUBLIC +target_include_directories(${MODULE_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/basic_shapes ) -target_link_libraries(visual_elements PUBLIC core geometry fonts mesh image) +target_link_libraries(${MODULE_NAME} PUBLIC core geometry fonts mesh image) -set_property(TARGET visual_elements PROPERTY FOLDER src) - -set_target_properties( visual_elements PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON ) +set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER src) +set_target_properties( ${MODULE_NAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON ) diff --git a/src/web/CMakeLists.txt b/src/web/CMakeLists.txt index 14db406..a95294b 100644 --- a/src/web/CMakeLists.txt +++ b/src/web/CMakeLists.txt @@ -1,3 +1,5 @@ +set(MODULE_NAME web) + list(APPEND web_LIB_INCLUDES xml/XmlParser.h xml/XmlParser.cpp @@ -24,19 +26,20 @@ list(APPEND web_LIB_INCLUDES html/HtmlElement.cpp html/elements/HtmlHeadElement.cpp html/elements/HtmlBodyElement.cpp + html/elements/HtmlParagraphElement.cpp ) # add the executable -add_library(web SHARED ${web_LIB_INCLUDES}) +add_library(${MODULE_NAME} SHARED ${web_LIB_INCLUDES}) target_include_directories(web PUBLIC - "${CMAKE_CURRENT_SOURCE_DIR}" - "${CMAKE_CURRENT_SOURCE_DIR}/xml" - "${CMAKE_CURRENT_SOURCE_DIR}/xml/xml-elements" - "${CMAKE_CURRENT_SOURCE_DIR}/html" - "${CMAKE_CURRENT_SOURCE_DIR}/html/elements" - "${CMAKE_CURRENT_SOURCE_DIR}/markdown" + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/xml + ${CMAKE_CURRENT_SOURCE_DIR}/xml/xml-elements + ${CMAKE_CURRENT_SOURCE_DIR}/html + ${CMAKE_CURRENT_SOURCE_DIR}/html/elements + ${CMAKE_CURRENT_SOURCE_DIR}/markdown ) -set_property(TARGET web PROPERTY FOLDER src) -target_link_libraries(web PUBLIC core) -set_target_properties( web PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON ) \ No newline at end of file +set_property(TARGET ${MODULE_NAME} PROPERTY FOLDER src) +target_link_libraries(${MODULE_NAME} PUBLIC core compiler) +set_target_properties( ${MODULE_NAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON ) \ No newline at end of file diff --git a/src/web/html/HtmlTextRun.h b/src/web/html/HtmlTextRun.h index cf39f6c..6832554 100644 --- a/src/web/html/HtmlTextRun.h +++ b/src/web/html/HtmlTextRun.h @@ -15,7 +15,7 @@ public: return Type::TEXT_RUN; } - std::string toString(unsigned depth = 0) const override + std::string toString(unsigned depth = 0, bool keepInline = false) const override { const auto prefix = std::string(2*depth, ' '); return prefix + getText(); diff --git a/src/web/html/elements/HtmlParagraphElement.cpp b/src/web/html/elements/HtmlParagraphElement.cpp index e69de29..439f72a 100644 --- a/src/web/html/elements/HtmlParagraphElement.cpp +++ b/src/web/html/elements/HtmlParagraphElement.cpp @@ -0,0 +1,46 @@ +#include "HtmlParagraphElement.h" + +std::string HtmlParagraphElement::toString(unsigned depth, bool keepInline) const +{ + const auto prefix = std::string(2*depth, ' '); + + auto content = prefix + "<" + getTagName(); + for (std::size_t idx=0; idx< getNumAttributes(); idx++) + { + auto attribute = getAttribute(idx); + content += " " + attribute->getName() + "=\"" + attribute->getValue() + "\""; + } + + const auto num_children = getNumChildren(); + if (num_children == 0 && getText().empty()) + { + content += "/>\n"; + return content; + } + else + { + content += ">"; + } + + if (!getText().empty()) + { + content += getText(); + } + + if (num_children>0) + { + content += "\n"; + } + for (std::size_t idx=0; idx< getNumChildren(); idx++) + { + auto child = getChild(idx); + content += child->toString(depth+1, true); + } + if (num_children>0) + { + content += prefix; + } + + content += "\n"; + return content; +} diff --git a/src/web/html/elements/HtmlParagraphElement.h b/src/web/html/elements/HtmlParagraphElement.h index 9fe618b..e534524 100644 --- a/src/web/html/elements/HtmlParagraphElement.h +++ b/src/web/html/elements/HtmlParagraphElement.h @@ -5,7 +5,6 @@ class HtmlParagraphElement : public HtmlElement { public: - HtmlParagraphElement() : HtmlElement("p") { @@ -15,4 +14,6 @@ public: { return Type::PARAGRAPH; } + + std::string toString(unsigned depth = 0, bool keepInline = false) const override; }; diff --git a/src/web/markdown/MarkdownComponents.cpp b/src/web/markdown/MarkdownComponents.cpp index 45c8792..646be2d 100644 --- a/src/web/markdown/MarkdownComponents.cpp +++ b/src/web/markdown/MarkdownComponents.cpp @@ -12,21 +12,46 @@ MarkdownParagraph::Type MarkdownParagraph::getType() const return Type::PARAGRAPH; } -void MarkdownParagraph::addChild(std::unique_ptr child) +void MarkdownElementWithChildren::addChild(std::unique_ptr child) { mChildren.push_back(std::move(child)); } -std::size_t MarkdownParagraph::getNumChildren() const +std::size_t MarkdownElementWithChildren::getNumChildren() const { return mChildren.size(); } -MarkdownInlineElement* MarkdownParagraph::getChild(std::size_t idx) const +MarkdownInlineElement* MarkdownElementWithChildren::getChild(std::size_t idx) const { return mChildren[idx].get(); } +MarkdownInlineElement* MarkdownElementWithChildren::getLastChild() const +{ + if (mChildren.empty()) + { + return nullptr; + } + else + { + return mChildren[mChildren.size()-1].get(); + } +} + +std::vector MarkdownElementWithChildren::getAllLinks() const +{ + std::vector links; + for(auto& child : mChildren) + { + if (child->getType() == Type::LINK) + { + links.push_back(dynamic_cast(child.get())); + } + } + return links; +} + MarkdownBulletItem::Type MarkdownBulletItem::getType() const { return Type::BULLET_ITEM; @@ -73,6 +98,16 @@ MarkdownInlineQuote::Type MarkdownInlineQuote::getType() const return Type::INLINE_QUOTE; } +MarkdownCustomInline::MarkdownCustomInline(const std::string& delimiter) + : mDelimiter(delimiter) +{ +} + +MarkdownCustomInline::Type MarkdownCustomInline::getType() const +{ + return Type::CUSTOM_INLINE; +}; + MarkdownLink::MarkdownLink(const std::string& target) : mTarget(target) { @@ -89,14 +124,6 @@ MarkdownLink::Type MarkdownLink::getType() const return Type::LINK; } -void MarkdownLink::doFieldSubstitution(Type elementType, const std::string& searchPhrase, const std::string& replacementPhrase) -{ - if (elementType == Type::LINK) - { - mTarget = StringUtils::replaceWith(mTarget, searchPhrase, replacementPhrase); - } -} - MarkdownImage::MarkdownImage(const std::string& source, const std::string& alt) : mSource(source), mAlt(alt) @@ -129,3 +156,15 @@ MarkdownMultilineQuote::Type MarkdownMultilineQuote::getType() const { return Type::MULTILINE_QUOTE; } + + +MarkdownCustomMultiLine::MarkdownCustomMultiLine(const std::string& tag, const std::string& delimiter) + : mTag(tag), + mDelimiter(delimiter) +{ +} + +MarkdownCustomMultiLine::Type MarkdownCustomMultiLine::getType() const +{ + return Type::CUSTOM_MULTILINE; +} diff --git a/src/web/markdown/MarkdownComponents.h b/src/web/markdown/MarkdownComponents.h index bea3d0d..9ce191f 100644 --- a/src/web/markdown/MarkdownComponents.h +++ b/src/web/markdown/MarkdownComponents.h @@ -29,25 +29,34 @@ public: { mTarget = target; } - - void doFieldSubstitution(Type elementType, const std::string& searchPhrase, const std::string& replacementPhrase) override; private: std::string mTarget; }; -class MarkdownParagraph : public MarkdownElement +class MarkdownElementWithChildren : public MarkdownElement { public: - virtual ~MarkdownParagraph() = default; - - Type getType() const override; - void addChild(std::unique_ptr child); std::size_t getNumChildren() const; MarkdownInlineElement* getChild(std::size_t idx) const; + MarkdownInlineElement* getLastChild() const; + + std::vector getAllLinks() const; + +private: + std::vector > mChildren; +}; + +class MarkdownParagraph : public MarkdownElementWithChildren +{ +public: + virtual ~MarkdownParagraph() = default; + + Type getType() const override; + void doFieldSubstitution(Type elementType, const std::string& searchPhrase, const std::string& replacementPhrase) override { for(auto& child : mChildren) @@ -55,25 +64,11 @@ public: child->doFieldSubstitution(elementType, searchPhrase, replacementPhrase); } } - - std::vector getAllLinks() const - { - std::vector links; - for(auto& child : mChildren) - { - if (child->getType() == Type::LINK) - { - links.push_back(dynamic_cast(child.get())); - } - } - return links; - } - private: std::vector > mChildren; }; -class MarkdownBulletItem : public MarkdownElement +class MarkdownBulletItem : public MarkdownElementWithChildren { public: virtual ~MarkdownBulletItem() = default; @@ -122,6 +117,17 @@ public: Type getType() const override; }; +class MarkdownCustomInline : public MarkdownInlineElement +{ +public: + MarkdownCustomInline(const std::string& delimiter); + virtual ~MarkdownCustomInline() = default; + + Type getType() const override; +private: + std::string mDelimiter; +}; + class MarkdownImage : public MarkdownInlineElement { public: @@ -140,7 +146,7 @@ private: std::string mAlt; }; -class MarkdownMultilineQuote : public MarkdownElement +class MarkdownMultilineQuote : public MarkdownMultilineElement { public: MarkdownMultilineQuote(const std::string& tag); @@ -151,3 +157,16 @@ public: private: std::string mTag; }; + +class MarkdownCustomMultiLine : public MarkdownMultilineElement +{ +public: + MarkdownCustomMultiLine(const std::string& tag, const std::string& delimiter); + + virtual ~MarkdownCustomMultiLine() = default; + + Type getType() const override; +private: + std::string mTag; + std::string mDelimiter; +}; diff --git a/src/web/markdown/MarkdownConverter.cpp b/src/web/markdown/MarkdownConverter.cpp index 7dbeb2e..db3cc33 100644 --- a/src/web/markdown/MarkdownConverter.cpp +++ b/src/web/markdown/MarkdownConverter.cpp @@ -10,6 +10,39 @@ #include "MarkdownDocument.h" +void MarkdownConverter::onBlockElement(MarkdownElementWithChildren* mdElement, HtmlElement* htmlElement) const +{ + for(unsigned idx=0; idx< mdElement->getNumChildren(); idx++) + { + auto child = mdElement->getChild(idx); + if (child->getType() == MarkdownElement::Type::INLINE_QUOTE) + { + auto html_quote = std::make_unique(); + html_quote->setText(child->getTextContent()); + htmlElement->addChild(std::move(html_quote)); + } + else if(child->getType() == MarkdownElement::Type::TEXT_SPAN) + { + auto html_text = std::make_unique(); + html_text->setText(child->getTextContent()); + htmlElement->addChild(std::move(html_text)); + } + else if(child->getType() == MarkdownElement::Type::LINK) + { + auto link_element = dynamic_cast(child); + auto html_text = std::make_unique(link_element->getTarget()); + html_text->setText(link_element->getTextContent()); + htmlElement->addChild(std::move(html_text)); + } + else if(child->getType() == MarkdownElement::Type::IMAGE) + { + auto link_element = dynamic_cast(child); + auto html_text = std::make_unique(link_element->getSource(), link_element->getAlt()); + htmlElement->addChild(std::move(html_text)); + } + } +} + void MarkdownConverter::convert(MarkdownDocument* markdownDoc, HtmlElement* parentElement) const { for(unsigned idx=0; idxgetNumElements();idx++) @@ -29,35 +62,8 @@ void MarkdownConverter::convert(MarkdownDocument* markdownDoc, HtmlElement* pare auto html_p_element = std::make_unique(); auto para_element = dynamic_cast(md_element); - for(unsigned idx=0; idx< para_element->getNumChildren(); idx++) - { - auto child = para_element->getChild(idx); - if (child->getType() == MarkdownElement::Type::INLINE_QUOTE) - { - auto html_quote = std::make_unique(); - html_quote->setText(child->getTextContent()); - html_p_element->addChild(std::move(html_quote)); - } - else if(child->getType() == MarkdownElement::Type::TEXT_SPAN) - { - auto html_text = std::make_unique(); - html_text->setText(child->getTextContent()); - html_p_element->addChild(std::move(html_text)); - } - else if(child->getType() == MarkdownElement::Type::LINK) - { - auto link_element = dynamic_cast(child); - auto html_text = std::make_unique(link_element->getTarget()); - html_text->setText(link_element->getTextContent()); - html_p_element->addChild(std::move(html_text)); - } - else if(child->getType() == MarkdownElement::Type::IMAGE) - { - auto link_element = dynamic_cast(child); - auto html_text = std::make_unique(link_element->getSource(), link_element->getAlt()); - html_p_element->addChild(std::move(html_text)); - } - } + onBlockElement(para_element, html_p_element.get()); + parentElement->addChild(std::move(html_p_element)); } else if(md_element->getType() == MarkdownElement::Type::BULLET_LIST) @@ -68,7 +74,9 @@ void MarkdownConverter::convert(MarkdownDocument* markdownDoc, HtmlElement* pare { auto child = list_element->getChild(idx); auto html_list_item = std::make_unique(); - html_list_item->setText(child->getTextContent()); + + onBlockElement(child, html_list_item.get()); + html_list->addChild(std::move(html_list_item)); } parentElement->addChild(std::move(html_list)); diff --git a/src/web/markdown/MarkdownConverter.h b/src/web/markdown/MarkdownConverter.h index 0626027..601e611 100644 --- a/src/web/markdown/MarkdownConverter.h +++ b/src/web/markdown/MarkdownConverter.h @@ -4,6 +4,8 @@ class HtmlDocument; class HtmlElement; +class MarkdownElementWithChildren; + class MarkdownDocument; class MarkdownConverter @@ -13,4 +15,6 @@ public: void convert(MarkdownDocument* markdownDoc, HtmlElement* parentElement) const; +private: + void onBlockElement(MarkdownElementWithChildren* mdElement, HtmlElement* htmlElement) const; }; diff --git a/src/web/markdown/MarkdownElement.cpp b/src/web/markdown/MarkdownElement.cpp index ec0d974..ff2da7a 100644 --- a/src/web/markdown/MarkdownElement.cpp +++ b/src/web/markdown/MarkdownElement.cpp @@ -9,3 +9,8 @@ const std::string& MarkdownElement::getTextContent() const { return mTextContent; } + +void MarkdownElement::addLine(const std::string& line) +{ + mTextContent += line + "\n"; +} diff --git a/src/web/markdown/MarkdownElement.h b/src/web/markdown/MarkdownElement.h index 7af2780..762e57c 100644 --- a/src/web/markdown/MarkdownElement.h +++ b/src/web/markdown/MarkdownElement.h @@ -10,12 +10,10 @@ public: HEADING, PARAGRAPH, TEXT_SPAN, - INLINE_CODE, - MULTILINE_CODE, INLINE_QUOTE, MULTILINE_QUOTE, - INLINE_SPECIAL, - MULTILINE_SPECIAL, + CUSTOM_INLINE, + CUSTOM_MULTILINE, LINK, IMAGE, BULLET_ITEM, @@ -26,6 +24,8 @@ public: void appendTextContent(const std::string& content); + void addLine(const std::string& line); + const std::string& getTextContent() const; virtual Type getType() const = 0; @@ -43,3 +43,9 @@ class MarkdownInlineElement : public MarkdownElement public: virtual ~MarkdownInlineElement() = default; }; + +class MarkdownMultilineElement : public MarkdownElement +{ +public: + virtual ~MarkdownMultilineElement() = default; +}; diff --git a/src/web/markdown/MarkdownParser.cpp b/src/web/markdown/MarkdownParser.cpp index f76306f..3fb2aef 100644 --- a/src/web/markdown/MarkdownParser.cpp +++ b/src/web/markdown/MarkdownParser.cpp @@ -1,15 +1,21 @@ #include "MarkdownParser.h" #include "MarkdownDocument.h" -#include "StringUtils.h" #include "MarkdownComponents.h" +#include "Lexer.h" +#include "StringUtils.h" + #include #include +static constexpr char MULTILINE_QUOTE_DELIMITER[]{"```"}; +static constexpr char HEADING_DELIMITER{'#'}; + MarkdownParser::MarkdownParser() { - + mCustomMultilineDelimiters = {{"$$"}}; + mCustomInlineDelimiters = {{"$"}}; } MarkdownParser::~MarkdownParser() @@ -17,362 +23,345 @@ MarkdownParser::~MarkdownParser() } -void MarkdownParser::onMultilineQuote() +bool MarkdownParser::isInMultilineBlock() const { - auto quote = std::make_unique(mWorkingTag); - quote->appendTextContent(mDocumentContent); - - mDocumentContent.clear(); - mWorkingTag.clear(); - - mDocumentState = DocumentState::NONE; - mMarkdownDocument->addElement(std::move(quote)); - - onNewParagraph(); -} - -void MarkdownParser::onInlineQuote() -{ - auto quote = std::make_unique(); - quote->appendTextContent(mLineContent); - mLineContent.clear(); - - mLineState = LineState::NONE; - if(mWorkingParagraph) + if (!mWorkingElement) { - mWorkingParagraph->addChild(std::move(quote)); + return false; } + auto working_type = mWorkingElement->getType(); + return working_type == MarkdownElement::Type::MULTILINE_QUOTE || working_type == MarkdownElement::Type::CUSTOM_MULTILINE ; } -void MarkdownParser::onHeading(unsigned level) +unsigned MarkdownParser::checkForLink(const std::string& lineSection) { - auto heading = std::make_unique(level); - heading->appendTextContent(mLineContent); - mMarkdownDocument->addElement(std::move(heading)); -} - -void MarkdownParser::onNewParagraph() -{ - if (mWorkingBulletList) + if (lineSection.empty()) { - mMarkdownDocument->addElement(std::move(mWorkingBulletList)); - mWorkingBulletList.reset(); - mDocumentState == DocumentState::NONE; + return 0; } - else if (mWorkingParagraph) - { - onTextSpan(); - if (!mWorkingParagraph->getNumChildren() == 0) + std::vector hits; + unsigned hit_size{0}; + if (Lexer::matchPattern("[@](@)", lineSection, '@', hits)) + { + if (hits.size() == 2) { - mMarkdownDocument->addElement(std::move(mWorkingParagraph)); + auto tag = hits[0]; + auto target = hits[1]; + + onTextSpanFinished(); + + auto element = std::make_unique(target); + element->appendTextContent(tag); + addChildToWorkingElement(std::move(element)); + hit_size = 4 + tag.size() + target.size(); } } - mWorkingParagraph = std::make_unique(); + return hit_size; } -void MarkdownParser::onTextSpan() +unsigned MarkdownParser::checkForImage(const std::string& lineSection) { - mLineContent.clear(); - - if(mWorkingParagraph && !mDocumentContent.empty()) + if (lineSection.empty()) { - auto text_span = std::make_unique(); - text_span->appendTextContent(mDocumentContent); - mWorkingParagraph->addChild(std::move(text_span)); - mDocumentContent.clear(); + return 0; } + + std::vector hits; + unsigned hit_size{0}; + if (Lexer::matchPattern("![@](@)", lineSection, '@', hits)) + { + if (hits.size() == 2) + { + auto alt = hits[0]; + auto source = hits[1]; + + onTextSpanFinished(); + + auto element = std::make_unique(source, alt); + addChildToWorkingElement(std::move(element)); + hit_size = 5 + alt.size() + source.size(); + } + } + return hit_size; } -std::pair MarkdownParser::onTick(unsigned tickCount) +unsigned MarkdownParser::checkForInlineQuote(const std::string& lineSection) { - unsigned new_tick_count = tickCount; - bool stop_line_processing = false; - - if (tickCount == 2) + if (lineSection.empty()) { - if (mDocumentState == DocumentState::IN_MULTILINEQUOTE) + return 0; + } + + std::vector hits; + unsigned hit_size{0}; + if (Lexer::matchPattern("`@`", lineSection, '@', hits)) + { + if (hits.size() == 1) { - onMultilineQuote(); - stop_line_processing = true; + auto content = hits[0]; + + onTextSpanFinished(); + + auto element = std::make_unique(); + element->appendTextContent(content); + + addChildToWorkingElement(std::move(element)); + hit_size = 2 + content.size(); + } + } + return hit_size; +} +unsigned MarkdownParser::checkForCustomInline(const std::string& lineSection) +{ + if (lineSection.empty()) + { + return 0; + } + + std::vector hits; + unsigned hit_size{0}; + + for(unsigned idx=0; idx(delimiter); + element->appendTextContent(content); + + addChildToWorkingElement(std::move(element)); + hit_size = 2*delimiter.size() + content.size(); + break; + } + } + } + return hit_size; +} + +void MarkdownParser::onTextSpanFinished() +{ + if (!mWorkingLine.empty()) + { + if (mWorkingTextSpan) + { + std::cout << "Adding to existing text span: " << std::endl; + mWorkingTextSpan->appendTextContent(mWorkingLine); } else { - onNewParagraph(); - mLineState = LineState::IN_MULTILINE_TAG; - new_tick_count = 0; - mDocumentState = DocumentState::IN_MULTILINEQUOTE; + std::cout << "Adding new text span: " << mWorkingLine << std::endl; + auto text_span = std::make_unique(); + text_span->addLine(mWorkingLine); + mWorkingTextSpan = text_span.get(); + + addChildToWorkingElement(std::move(text_span)); + } + mWorkingLine.clear(); + mWorkingTextSpan = nullptr; + } +} + +void MarkdownParser::addChildToWorkingElement(std::unique_ptr child) +{ + dynamic_cast(mWorkingElement)->addChild(std::move(child)); +} + +void MarkdownParser::processLine(const std::string& line) +{ + if (isInMultilineBlock()) + { + mWorkingElement->addLine(line); + return; + } + + if (!mWorkingElement) + { + std::cout << "Adding new paragraph " << std::endl; + auto paragraph = std::make_unique(); + mWorkingElement = paragraph.get(); + mMarkdownDocument->addElement(std::move(paragraph)); + } + + if (mWorkingElement && mWorkingElement->getType() == MarkdownElement::Type::PARAGRAPH) + { + if (auto last_text_span = dynamic_cast(mWorkingElement)->getLastChild()) + { + mWorkingTextSpan = last_text_span; } } - else if(mLineState == LineState::IN_INLINEQUOTE) + + unsigned line_position = 0; + mWorkingLine.clear(); + while(line_position < line.size()) { - if (mLineContent.empty()) + const auto remaining = line.substr(line_position, line.size() - line_position); + if(auto length = checkForImage(remaining)) { - mLineState = LineState::NONE; - new_tick_count++; + line_position += length; + } + else if(auto length = checkForLink(remaining)) + { + line_position += length; + } + else if(auto length = checkForInlineQuote(remaining)) + { + line_position += length; + } + else if(auto length = checkForCustomInline(remaining)) + { + line_position += length; } else { - new_tick_count = 0; - onInlineQuote(); - } - } - else if(mDocumentState == DocumentState::IN_MULTILINEQUOTE) - { - new_tick_count++; - mLineContent += '`'; - } - else - { - new_tick_count++; - mLineState = LineState::IN_INLINEQUOTE; - } - return {new_tick_count, stop_line_processing}; -} - -void MarkdownParser::onLink() -{ - auto element = std::make_unique(mLineContent); - mLineContent.clear(); - - element->appendTextContent(mWorkingTag); - mWorkingTag.clear(); - - if (mWorkingParagraph) - { - mWorkingParagraph->addChild(std::move(element)); - } - mLineState = LineState::NONE; -} - -void MarkdownParser::onImage() -{ - auto element = std::make_unique(mLineContent, mWorkingTag); - mLineContent.clear(); - - element->appendTextContent(mWorkingTag); - mWorkingTag.clear(); - - if (mWorkingParagraph) - { - mWorkingParagraph->addChild(std::move(element)); - } - mLineState = LineState::NONE; -} - -void MarkdownParser::onBulletItem() -{ - if (!mWorkingBulletList) - { - mWorkingBulletList = std::make_unique(); - mDocumentState == DocumentState::IN_BULLETS; - } - - auto item = std::make_unique(); - item->appendTextContent(mLineContent); - mLineContent.clear(); - - mWorkingBulletList->addChild(std::move(item)); -} - -void MarkdownParser::processLine() -{ - mLineContent.clear(); - mLineState = LineState::NONE; - - unsigned heading_level{0}; - unsigned tick_count{0}; - bool flushed_pre_inline = false; - - bool first_nonspace = false; - for(auto c : mWorkingLine) - { - if (!StringUtils::IsSpace(c)) - { - if (first_nonspace) - { - first_nonspace = false; - } - else - { - first_nonspace = true; - } - } - else - { - first_nonspace = false; - } - - if (c == '`') - { - auto [ret_tick_count, stop_line_processing] = onTick(tick_count); - tick_count = ret_tick_count; - if(stop_line_processing) - { - return; - } - } - else - { - if (mLineState == LineState::IN_INLINEQUOTE) - { - if (!flushed_pre_inline) - { - mDocumentContent += mLineContent; - onTextSpan(); - flushed_pre_inline = true; - } - mLineContent += c; - } - else if (mDocumentState == DocumentState::IN_MULTILINEQUOTE) - { - mLineContent += c; - } - else if(mLineState == LineState::IN_LINK_TAG) - { - if (c == ']') - { - mLineState = LineState::AWAITING_LINK_BODY; - } - else - { - mWorkingTag += c; - } - } - else if(mLineState == LineState::AWAITING_LINK_BODY) - { - if (c == '(') - { - mLineState = LineState::IN_LINK_BODY; - } - else - { - mLineContent = '[' + mWorkingTag + ']'; - mLineState = LineState::NONE; - } - } - else if(mLineState == LineState::IN_LINK_BODY) - { - if(c==')') - { - onLink(); - } - else - { - mLineContent += c; - } - } - else if(mLineState == LineState::AWAITING_IMG_TAG) - { - if (c == '[') - { - mLineState = LineState::IN_IMG_TAG; - } - else - { - mLineContent = "!["; - mLineState = LineState::NONE; - } - } - else if(mLineState == LineState::IN_IMG_TAG) - { - if (c == ']') - { - mLineState = LineState::AWAITING_IMG_BODY; - } - else - { - mWorkingTag += c; - } - } - else if(mLineState == LineState::AWAITING_IMG_BODY) - { - if (c == '(') - { - mLineState = LineState::IN_IMG_BODY; - } - else - { - mLineContent = "![" + mWorkingTag + "]"; - mWorkingTag.clear(); - mLineState = LineState::NONE; - } - } - else if(mLineState == LineState::IN_IMG_BODY) - { - if (c == ')') - { - onImage(); - } - else - { - mLineContent += c; - } - } - else - { - if (c == '#') - { - onNewParagraph(); - mLineState = LineState::IN_HEADING; - heading_level++; - } - else if(c == '[') - { - mDocumentContent += mLineContent; - onTextSpan(); - mLineState = LineState::IN_LINK_TAG; - } - else if(c == '!') - { - mDocumentContent += mLineContent; - onTextSpan(); - mLineState = LineState::AWAITING_IMG_TAG; - } - else if(first_nonspace && c == '*') - { - if (!mWorkingBulletList) - { - onNewParagraph(); - } - mLineState = LineState::IN_BULLETS; - } - else - { - mLineContent += c; - } - } - } - } - - if (mLineState == LineState::IN_HEADING) - { - onHeading(heading_level); - } - else if(mLineState == LineState::IN_MULTILINE_TAG) - { - mWorkingTag = mLineContent; - } - else if (mLineState == LineState::IN_INLINEQUOTE) - { - onTextSpan(); - } - else if (mLineState == LineState::IN_BULLETS) - { - onBulletItem(); - } - else - { - if (mLineContent.size() > 0) - { - mDocumentContent.append(mLineContent); + mWorkingLine += line[line_position]; + line_position++; } } + onTextSpanFinished(); } void MarkdownParser::onEmptyLine() { - onNewParagraph(); + if (!isInMultilineBlock()) + { + onSectionFinished(); + } +} + +bool MarkdownParser::startsWithMultiLineQuote(const std::string& line) const +{ + const bool ignore_whitespace{true}; + return StringUtils::startsWith(line, MULTILINE_QUOTE_DELIMITER, ignore_whitespace); +} + +int MarkdownParser::startsWithCustomMultilineBlock(const std::string& line) const +{ + for(unsigned idx=0; idxgetType() == MarkdownElement::Type::MULTILINE_QUOTE) + { + onSectionFinished(); + } + else if(isInMultilineBlock()) + { + processLine(line); + } + else + { + const auto tag = StringUtils::removeUpTo(line, MULTILINE_QUOTE_DELIMITER); + auto quote = std::make_unique(tag); + mWorkingElement = quote.get(); + mMarkdownDocument->addElement(std::move(quote)); + } +} + +void MarkdownParser::onFoundCustomMultiLineBlock(const std::string& line, unsigned blockSlot) +{ + if (mWorkingElement && mWorkingElement->getType() == MarkdownElement::Type::CUSTOM_MULTILINE && blockSlot == mCustomDelimiterIndex) + { + onSectionFinished(); + } + else if(isInMultilineBlock()) + { + processLine(line); + } + else + { + const auto delimiter = mCustomMultilineDelimiters[blockSlot]; + const auto tag = StringUtils::removeUpTo(line, delimiter); + auto quote = std::make_unique(tag, delimiter); + mWorkingElement = quote.get(); + mMarkdownDocument->addElement(std::move(quote)); + } +} + +void MarkdownParser::onFoundHeading(const std::string& line) +{ + if(isInMultilineBlock()) + { + processLine(line); + } + else + { + onSectionFinished(); + + unsigned level = StringUtils::countFirstConsecutiveHits(line, HEADING_DELIMITER); + auto heading = std::make_unique(level); + + std::string prefix; + for(unsigned idx=0; idxappendTextContent(StringUtils::stripSurroundingWhitepsace(StringUtils::removeUpTo(line, prefix))); + mMarkdownDocument->addElement(std::move(heading)); + } +} + +void MarkdownParser::onFoundBulletItem(const std::string& line) +{ + if(isInMultilineBlock()) + { + processLine(line); + } + else + { + if (mWorkingBulletList) + { + auto item = std::make_unique(); + mWorkingElement = item.get(); + mWorkingBulletList->addChild(std::move(item)); + } + else + { + std::cout << "Starting new bullet list" << std::endl; + auto bullet_list = std::make_unique(); + mWorkingBulletList = bullet_list.get(); + + mMarkdownDocument->addElement(std::move(bullet_list)); + + auto bullet_item = std::make_unique(); + mWorkingElement = bullet_item.get(); + mWorkingBulletList->addChild(std::move(bullet_item)); + + processLine(StringUtils::removeUpTo(line, "*")); + } + } +} + +void MarkdownParser::onSectionFinished() +{ + std::cout << "Section is finished" << std::endl; + mWorkingElement = nullptr; + mWorkingBulletList = nullptr; + mWorkingTextSpan = nullptr; } std::unique_ptr MarkdownParser::run(const std::string& content) @@ -384,17 +373,39 @@ std::unique_ptr MarkdownParser::run(const std::string& content while (std::getline(ss, line, '\n')) { - if (line.empty()) + std::cout << "Processing line " << line << std::endl; + if (StringUtils::isWhitespaceOnly(line)) { + std::cout << "Is whitespace only " << std::endl; onEmptyLine(); continue; } - mWorkingLine = line; - processLine(); + else if (startsWithMultiLineQuote(line)) + { + std::cout << "Found multiline quote" << std::endl; + onFoundMultiLineQuote(line); + } + else if (auto result = startsWithCustomMultilineBlock(line); result >= 0) + { + std::cout << "Found custom multiline" << std::endl; + onFoundCustomMultiLineBlock(line, result); + } + else if (startsWithHeading(line)) + { + std::cout << "Found heading" << std::endl; + onFoundHeading(line); + } + else if(startsWithBulletItem(line)) + { + std::cout << "Found bulletitem" << std::endl; + onFoundBulletItem(line); + } + else + { + std::cout << "Found nothing - process line" << std::endl; + processLine(line); + } } - onTextSpan(); - onNewParagraph(); - return std::move(mMarkdownDocument); } diff --git a/src/web/markdown/MarkdownParser.h b/src/web/markdown/MarkdownParser.h index b65c333..65a2dbd 100644 --- a/src/web/markdown/MarkdownParser.h +++ b/src/web/markdown/MarkdownParser.h @@ -2,36 +2,15 @@ #include #include +#include class MarkdownDocument; -class MarkdownParagraph; +class MarkdownElement; +class MarkdownInlineElement; class MarkdownBulletList; class MarkdownParser { - enum class DocumentState - { - NONE, - IN_MULTILINEQUOTE, - IN_BULLETS - }; - - enum class LineState - { - NONE, - IN_HEADING, - IN_INLINEQUOTE, - IN_MULTILINE_TAG, - IN_LINK_TAG, - AWAITING_LINK_BODY, - IN_LINK_BODY, - AWAITING_IMG_TAG, - IN_IMG_TAG, - AWAITING_IMG_BODY, - IN_IMG_BODY, - IN_BULLETS - }; - public: MarkdownParser(); @@ -40,34 +19,40 @@ public: std::unique_ptr run(const std::string& content); private: - void processLine(); + void addChildToWorkingElement(std::unique_ptr child); - void onMultilineQuote(); - void onInlineQuote(); - void onHeading(unsigned level); - void onLink(); - void onImage(); + unsigned checkForImage(const std::string& lineSection); + unsigned checkForLink(const std::string& lineSection); + unsigned checkForInlineQuote(const std::string& lineSection); + unsigned checkForCustomInline(const std::string& lineSection); + + bool isInMultilineBlock() const; + + bool startsWithMultiLineQuote(const std::string& line) const; + int startsWithCustomMultilineBlock(const std::string& line) const; + bool startsWithHeading(const std::string& line) const; + bool startsWithBulletItem(const std::string& line) const; + + void onFoundMultiLineQuote(const std::string& line); + void onFoundCustomMultiLineBlock(const std::string& line, unsigned blockSlot); + void onFoundHeading(const std::string& line); + void onFoundBulletItem(const std::string& line); void onEmptyLine(); - void onNewParagraph(); + void onSectionFinished(); + void onTextSpanFinished(); - void onBulletItem(); + void processLine(const std::string& line); - void onTextSpan(); + unsigned mCustomDelimiterIndex{0}; + std::vector mCustomMultilineDelimiters; + std::vector mCustomInlineDelimiters; - std::pair onTick(unsigned tickCount); + MarkdownElement* mWorkingElement{nullptr}; + MarkdownBulletList* mWorkingBulletList{nullptr}; + MarkdownInlineElement* mWorkingTextSpan{nullptr}; std::string mWorkingLine; - std::string mLineContent; - std::string mDocumentContent; - - std::string mWorkingTag; - - LineState mLineState {LineState::NONE}; - DocumentState mDocumentState {DocumentState::NONE}; - - std::unique_ptr mWorkingParagraph; - std::unique_ptr mWorkingBulletList; std::unique_ptr mMarkdownDocument; }; diff --git a/src/web/xml/XmlParser.cpp b/src/web/xml/XmlParser.cpp index 32fad1c..1f5d1dc 100644 --- a/src/web/xml/XmlParser.cpp +++ b/src/web/xml/XmlParser.cpp @@ -53,11 +53,11 @@ void XmlParser::processLine(const std::string& input) void XmlParser::onChar(char c) { - if(StringUtils::IsAlphaNumeric(c)) + if(StringUtils::isAlphaNumeric(c)) { onAlphaNumeric(c); } - else if(StringUtils::IsSpace(c)) + else if(StringUtils::isSpace(c)) { onSpace(c); } diff --git a/src/web/xml/xml-elements/XmlElement.cpp b/src/web/xml/xml-elements/XmlElement.cpp index 5bef9b4..d2d7b20 100644 --- a/src/web/xml/xml-elements/XmlElement.cpp +++ b/src/web/xml/xml-elements/XmlElement.cpp @@ -98,10 +98,12 @@ XmlElement* XmlElement::getChild(std::size_t index) const return mChildren[index].get(); } -std::string XmlElement::toString(unsigned depth) const +std::string XmlElement::toString(unsigned depth, bool keepInline) const { const auto prefix = std::string(2*depth, ' '); + std::string line_ending = keepInline ? "" : "\n"; + auto content = prefix + "<" + getTagName(); for (std::size_t idx=0; idx< getNumAttributes(); idx++) { @@ -112,7 +114,7 @@ std::string XmlElement::toString(unsigned depth) const const auto num_children = getNumChildren(); if (num_children == 0 && getText().empty()) { - content += "/>\n"; + content += "/>" + line_ending; return content; } else @@ -127,18 +129,18 @@ std::string XmlElement::toString(unsigned depth) const if (num_children>0) { - content += "\n"; + content += line_ending; } for (std::size_t idx=0; idx< getNumChildren(); idx++) { auto child = getChild(idx); - content += child->toString(depth+1); + content += child->toString(depth+1, keepInline); } if (num_children>0) { content += prefix; } - content += "\n"; + content += "" + line_ending; return content; } diff --git a/src/web/xml/xml-elements/XmlElement.h b/src/web/xml/xml-elements/XmlElement.h index 39b2988..fb1b1f1 100644 --- a/src/web/xml/xml-elements/XmlElement.h +++ b/src/web/xml/xml-elements/XmlElement.h @@ -34,7 +34,7 @@ public: void setText(const std::string& text); void setTagName(const std::string& tagName); - virtual std::string toString(unsigned depth = 0) const; + virtual std::string toString(unsigned depth = 0, bool keepInline = false) const; protected: std::string mTagName; diff --git a/test/compiler/CMakeLists.txt b/test/compiler/CMakeLists.txt index 2d53f0e..1cd4913 100644 --- a/test/compiler/CMakeLists.txt +++ b/test/compiler/CMakeLists.txt @@ -1,5 +1,6 @@ set(COMPILER_UNIT_TEST_FILES compiler/TestTemplatingEngine.cpp + compiler/TestLexer.cpp PARENT_SCOPE ) diff --git a/test/compiler/TestLexer.cpp b/test/compiler/TestLexer.cpp new file mode 100644 index 0000000..965586f --- /dev/null +++ b/test/compiler/TestLexer.cpp @@ -0,0 +1,19 @@ +#include "Lexer.h" + +#include "TestFramework.h" +#include "TestUtils.h" + +#include + +TEST_CASE(TestLexer_MatchPattern, "[compiler]") +{ + std::string input = "[I'm inside the tag](I'm inside the brackets), followed by more text."; + std::string pattern = "[@](@)"; + + std::vector hits; + const auto matched = Lexer::matchPattern(pattern, input, '@', hits); + REQUIRE(matched); + REQUIRE(hits.size() == 2); + REQUIRE(hits[0] == "I'm inside the tag"); + REQUIRE(hits[1] == "I'm inside the brackets"); +} diff --git a/test/core/TestStringUtils.cpp b/test/core/TestStringUtils.cpp index 5edb5c0..7c7e005 100644 --- a/test/core/TestStringUtils.cpp +++ b/test/core/TestStringUtils.cpp @@ -3,13 +3,28 @@ #include "TestFramework.h" #include "TestUtils.h" -#include - -TEST_CASE(TestStringUtils_Strip, "core") +TEST_CASE(TestStringUtils_StripSurroundingWhitepsace, "core") { std::string input = " super() "; - std::string stripped = StringUtils::strip(input); - - auto predicate = stripped == "super()"; - REQUIRE(predicate); + std::string stripped = StringUtils::stripSurroundingWhitepsace(input); + REQUIRE(stripped == "super()"); +} + +TEST_CASE(TestStringUtils_RemoveUpTo, "core") +{ + std::string input = "def{filename}abc/123/456"; + std::string removed = StringUtils::removeUpTo(input, "{filename}"); + REQUIRE(removed == "abc/123/456"); +} + +TEST_CASE(TestStringUtils_startsWith, "core") +{ + std::string input = " ```some triple ticks "; + bool ignore_whitespace{false}; + auto starts_with = StringUtils::startsWith(input, "```", ignore_whitespace); + REQUIRE(!starts_with); + + ignore_whitespace = true; + starts_with = StringUtils::startsWith(input, "```", ignore_whitespace); + REQUIRE(starts_with); } diff --git a/test/data/simple_markdown.md b/test/data/simple_markdown.md new file mode 100644 index 0000000..2decf41 --- /dev/null +++ b/test/data/simple_markdown.md @@ -0,0 +1,30 @@ +# I'm a level one header +I'm some text under level one + +## I'm a level two header +I'm some text under level two + +``` +I'm a code block +``` + +I'm a line under the code block, with some `inline code`. + +### I'm a level three header +I'm a bullet point list: + +* First point +* Second point +* Third point + +With a [hyperlink](www.imahyperlink.com) embedded. + +# I'm another level one header + +I'm some inline math $a = b + c$ and I'm some standalone math: + +$$ +d = e + f +$$ + +![This is an image](https://myoctocat.com/assets/images/base-octocat.svg) diff --git a/test/test_runner.cpp b/test/test_runner.cpp index 54e5d0d..a039331 100644 --- a/test/test_runner.cpp +++ b/test/test_runner.cpp @@ -1,17 +1,22 @@ #include "TestFramework.h" +#include "CommandLineArgs.h" + #ifdef _WIN32 #include #endif //int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PWSTR pCmdLine, int nCmdShow) -int main() +int main(int argc, char *argv[]) { #ifdef _WIN32 CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED); #endif - auto result = TestCaseRunner::getInstance().run(); + auto args = CommandLineArgs::Create(); + args->process(argc, argv); + + auto result = TestCaseRunner::getInstance().run(args->getUserArgs()); #ifdef _WIN32 CoUninitialize(); diff --git a/test/test_utils/TestCaseRunner.cpp b/test/test_utils/TestCaseRunner.cpp index ab74951..8042865 100644 --- a/test/test_utils/TestCaseRunner.cpp +++ b/test/test_utils/TestCaseRunner.cpp @@ -36,17 +36,30 @@ void TestCaseRunner::markTestFailure(const std::string& line) sFailureLine = line; } -bool TestCaseRunner::run() +bool TestCaseRunner::run(const std::vector& args) { + std::string test_to_run; + if (args.size() > 0 ) + { + test_to_run = args[0]; + } FileLogger::GetInstance().disable(); for (auto test_case : mCases) { + if (!test_to_run.empty()) + { + if (test_case->getName() != test_to_run) + { + continue; + } + } + sLastTestFailed = false; std::cout << "TestFramework: Running Test - " << test_case->getName() << std::endl; test_case->run(); if (sLastTestFailed) { - std::cout << "Failed at line: " << sLastTestFailed << std::endl; + std::cout << "Failed at line: " << sFailureLine << std::endl; mFailingTests.push_back(test_case->getName()); } } diff --git a/test/test_utils/TestCaseRunner.h b/test/test_utils/TestCaseRunner.h index 7d1b45d..029aa51 100644 --- a/test/test_utils/TestCaseRunner.h +++ b/test/test_utils/TestCaseRunner.h @@ -18,7 +18,7 @@ public: void markTestFailure(const std::string& line); - bool run(); + bool run(const std::vector& args); private: std::vector mFailingTests; diff --git a/test/test_utils/TestFramework.h b/test/test_utils/TestFramework.h index 1ede878..7e86d06 100644 --- a/test/test_utils/TestFramework.h +++ b/test/test_utils/TestFramework.h @@ -16,12 +16,12 @@ struct Holder static void Test##NAME() \ -#define REQUIRE(predicate) \ - if(!predicate) \ - { \ - TestCaseRunner::getInstance().markTestFailure(std::to_string(__LINE__)); \ - return; \ - } \ +#define REQUIRE(predicate) \ + if(!bool(predicate)) \ + { \ + TestCaseRunner::getInstance().markTestFailure(std::to_string(__LINE__) + " with check: '" + std::string(#predicate) + "'"); \ + return; \ + } \ diff --git a/test/test_utils/TestUtils.h b/test/test_utils/TestUtils.h index 1524595..fc3d946 100644 --- a/test/test_utils/TestUtils.h +++ b/test/test_utils/TestUtils.h @@ -7,9 +7,17 @@ using Path = std::filesystem::path; class TestUtils { public: - static Path getTestOutputDir() + static Path getTestOutputDir(const std::string& testFileName = {}) { - return std::filesystem::current_path() / "test_output"; + if (!testFileName.empty()) + { + const auto name = Path(testFileName).filename().stem(); + return std::filesystem::current_path() / "test_output" / name; + } + else + { + return std::filesystem::current_path() / "test_output"; + } } static Path getTestDataDir() diff --git a/test/web/TestMarkdownParser.cpp b/test/web/TestMarkdownParser.cpp index 00fdadf..6a3adbe 100644 --- a/test/web/TestMarkdownParser.cpp +++ b/test/web/TestMarkdownParser.cpp @@ -11,7 +11,9 @@ #include "TestFramework.h" #include "TestUtils.h" -TEST_CASE(TestMarkdownParser, "web") +#include + +TEST_CASE(TestMarkdownParser, "[web]") { File md_file(TestUtils::getTestDataDir() / "sample_markdown.md"); const auto md_content = md_file.readText(); @@ -19,12 +21,59 @@ TEST_CASE(TestMarkdownParser, "web") MarkdownParser parser; auto md_doc = parser.run(md_content); + std::vector expected_top_level = { + MarkdownElement::Type::HEADING, + MarkdownElement::Type::PARAGRAPH, + MarkdownElement::Type::HEADING, + MarkdownElement::Type::PARAGRAPH, + MarkdownElement::Type::MULTILINE_QUOTE, + MarkdownElement::Type::PARAGRAPH, + MarkdownElement::Type::HEADING, + MarkdownElement::Type::PARAGRAPH + }; + + REQUIRE(expected_top_level.size() <= md_doc->getNumElements()); + for(unsigned idx=0; idxgetElement(idx)->getType() == expected_top_level[idx]); + } + MarkdownConverter converter; auto html = converter.convert(md_doc.get()); HtmlWriter writer; const auto html_string = writer.toString(html.get()); - File html_file(TestUtils::getTestOutputDir() / "TestMarkdownParserOut.html"); + File html_file(TestUtils::getTestOutputDir(__FILE__) / "TestMarkdownParser.html"); + html_file.writeText(html_string); +} + +TEST_CASE(TestMarkdownParser_Simple, "[web]") +{ + File md_file(TestUtils::getTestDataDir() / "simple_markdown.md"); + const auto md_content = md_file.readText(); + + REQUIRE(!md_content.empty()); + + MarkdownParser parser; + auto md_doc = parser.run(md_content); + + std::vector expected_top_level = { + MarkdownElement::Type::PARAGRAPH, + MarkdownElement::Type::BULLET_LIST}; + + //REQUIRE(expected_top_level.size() <= md_doc->getNumElements()); + for(unsigned idx=0; idxgetElement(idx)->getType() == expected_top_level[idx]); + } + + MarkdownConverter converter; + auto html = converter.convert(md_doc.get()); + + HtmlWriter writer; + const auto html_string = writer.toString(html.get()); + + File html_file(TestUtils::getTestOutputDir(__FILE__) / "TestMarkdownParser_simple.html"); html_file.writeText(html_string); }