From 650cfa5c6fa79e8f55f60efac85c3edee6181aff Mon Sep 17 00:00:00 2001 From: James Grogan Date: Thu, 1 Dec 2022 18:38:46 +0000 Subject: [PATCH] Simple markdown converter. --- src/web/html/HtmlElement.h | 78 +++++++++- src/web/markdown/MarkdownConverter.cpp | 26 ++++ src/web/markdown/MarkdownDocument.h | 102 +++++++++++++- src/web/markdown/MarkdownParser.cpp | 188 ++++++++++++++++++++++++- src/web/markdown/MarkdownParser.h | 25 +++- 5 files changed, 410 insertions(+), 9 deletions(-) diff --git a/src/web/html/HtmlElement.h b/src/web/html/HtmlElement.h index aada2f1..472fcaa 100644 --- a/src/web/html/HtmlElement.h +++ b/src/web/html/HtmlElement.h @@ -2,6 +2,7 @@ #include #include "XmlElement.h" +#include "XmlAttribute.h" class HtmlElement : public XmlElement { @@ -14,7 +15,11 @@ public: PARAGRAPH, TEXT_RUN, CODE, - HEADING + HEADING, + HYPERLINK, + IMAGE, + UNORDERED_LIST, + LIST_ITEM }; HtmlElement(const std::string& tagName); @@ -50,4 +55,75 @@ public: } }; +class HtmlHyperlinkElement : public HtmlElement +{ +public: + HtmlHyperlinkElement(const std::string& href) : HtmlElement("a") + { + if(!href.empty()) + { + auto href_attr = std::make_unique("href"); + href_attr->setValue(href); + addAttribute(std::move(href_attr)); + } + } + + Type getType() const override + { + return Type::HYPERLINK; + } +}; + +class HtmlImageElement : public HtmlElement +{ +public: + HtmlImageElement(const std::string& src, const std::string& alt) : HtmlElement("img") + { + if(!src.empty()) + { + auto href_attr = std::make_unique("src"); + href_attr->setValue(src); + addAttribute(std::move(href_attr)); + } + + if(!alt.empty()) + { + auto href_attr = std::make_unique("alt"); + href_attr->setValue(alt); + addAttribute(std::move(href_attr)); + } + } + + Type getType() const override + { + return Type::IMAGE; + } +}; + +class HtmlUnorderedList : public HtmlElement +{ +public: + HtmlUnorderedList() : HtmlElement("ul") + { + } + + Type getType() const override + { + return Type::UNORDERED_LIST; + } +}; + +class HtmlListItem : public HtmlElement +{ +public: + HtmlListItem() : HtmlElement("li") + { + } + + Type getType() const override + { + return Type::LIST_ITEM; + } +}; + using HtmlElementUPtr = std::unique_ptr; diff --git a/src/web/markdown/MarkdownConverter.cpp b/src/web/markdown/MarkdownConverter.cpp index 9bce3e9..b561fee 100644 --- a/src/web/markdown/MarkdownConverter.cpp +++ b/src/web/markdown/MarkdownConverter.cpp @@ -43,9 +43,35 @@ std::unique_ptr MarkdownConverter::convert(MarkdownDocument* markd html_text->setText(child->getTextContent()); html_p_element->addChild(std::move(html_text)); } + else if(child->getType() == MarkdownElement::Type::LINK) + { + auto link_element = dynamic_cast(child); + auto html_text = std::make_unique(link_element->getTarget()); + html_text->setText(link_element->getTextContent()); + html_p_element->addChild(std::move(html_text)); + } + else if(child->getType() == MarkdownElement::Type::IMAGE) + { + auto link_element = dynamic_cast(child); + auto html_text = std::make_unique(link_element->getSource(), link_element->getAlt()); + html_p_element->addChild(std::move(html_text)); + } } html_doc->addElementToBody(std::move(html_p_element)); } + else if(md_element->getType() == MarkdownElement::Type::BULLET_LIST) + { + auto list_element = dynamic_cast(md_element); + auto html_list = std::make_unique(); + for(unsigned idx=0; idx< list_element->getNumChildren(); idx++) + { + auto child = list_element->getChild(idx); + auto html_list_item = std::make_unique(); + html_list_item->setText(child->getTextContent()); + html_list->addChild(std::move(html_list_item)); + } + html_doc->addElementToBody(std::move(html_list)); + } else if(md_element->getType() == MarkdownElement::Type::MULTILINE_QUOTE) { auto html_quote = std::make_unique(); diff --git a/src/web/markdown/MarkdownDocument.h b/src/web/markdown/MarkdownDocument.h index d2da606..549e637 100644 --- a/src/web/markdown/MarkdownDocument.h +++ b/src/web/markdown/MarkdownDocument.h @@ -18,7 +18,9 @@ public: INLINE_SPECIAL, MULTILINE_SPECIAL, LINK, - IMAGE + IMAGE, + BULLET_ITEM, + BULLET_LIST }; virtual ~MarkdownElement() = default; @@ -83,6 +85,45 @@ public: std::vector > mChildren; }; +class MarkdownBulletItem : public MarkdownElement +{ +public: + virtual ~MarkdownBulletItem() = default; + + Type getType() const override + { + return Type::BULLET_ITEM; + } +}; + +class MarkdownBulletList : public MarkdownElement +{ +public: + virtual ~MarkdownBulletList() = default; + + Type getType() const override + { + return Type::BULLET_LIST; + } + + void addChild(std::unique_ptr child) + { + mChildren.push_back(std::move(child)); + } + + std::size_t getNumChildren() const + { + return mChildren.size(); + } + + MarkdownBulletItem* getChild(std::size_t idx) const + { + return mChildren[idx].get(); + } + + std::vector > mChildren; +}; + class MarkdownHeading : public MarkdownElement { public: @@ -121,6 +162,63 @@ public: } }; +class MarkdownLink : public MarkdownInlineElement +{ +public: + + MarkdownLink(const std::string& target) + : mTarget(target) + { + + } + + virtual ~MarkdownLink() = default; + + const std::string& getTarget() const + { + return mTarget; + } + + Type getType() const override + { + return Type::LINK; + } +private: + std::string mTarget; +}; + +class MarkdownImage : public MarkdownInlineElement +{ +public: + MarkdownImage(const std::string& source, const std::string& alt) + : mSource(source), + mAlt(alt) + { + + } + + virtual ~MarkdownImage() = default; + + Type getType() const override + { + return Type::IMAGE; + } + + const std::string& getSource() const + { + return mSource; + } + + const std::string& getAlt() const + { + return mAlt; + } + +private: + std::string mSource; + std::string mAlt; +}; + class MarkdownMultilineQuote : public MarkdownElement { public: @@ -142,6 +240,8 @@ private: }; + + class MarkdownDocument { public: diff --git a/src/web/markdown/MarkdownParser.cpp b/src/web/markdown/MarkdownParser.cpp index 2ab2d99..f05dd77 100644 --- a/src/web/markdown/MarkdownParser.cpp +++ b/src/web/markdown/MarkdownParser.cpp @@ -1,6 +1,7 @@ #include "MarkdownParser.h" #include "MarkdownDocument.h" +#include "StringUtils.h" #include #include @@ -18,9 +19,12 @@ MarkdownParser::~MarkdownParser() void MarkdownParser::onMultilineQuote() { std::cout << "Adding multiline quote " << mDocumentContent << std::endl; - auto quote = std::make_unique(mMultilineTag); + auto quote = std::make_unique(mWorkingTag); quote->appendTextContent(mDocumentContent); + mDocumentContent.clear(); + mWorkingTag.clear(); + mDocumentState = DocumentState::NONE; mMarkdownDocument->addElement(std::move(quote)); @@ -53,7 +57,14 @@ void MarkdownParser::onHeading(unsigned level) void MarkdownParser::onNewParagraph() { - if (mWorkingParagraph) + if (mWorkingBulletList) + { + std::cout << "Adding bullets to document" << std::endl; + mMarkdownDocument->addElement(std::move(mWorkingBulletList)); + mWorkingBulletList.reset(); + mDocumentState == DocumentState::NONE; + } + else if (mWorkingParagraph) { onTextSpan(); @@ -127,6 +138,55 @@ std::pair MarkdownParser::onTick(unsigned tickCount) return {new_tick_count, stop_line_processing}; } +void MarkdownParser::onLink() +{ + std::cout << "Adding hyperlink to " << mLineContent << " with tag " << mWorkingTag << std::endl; + auto element = std::make_unique(mLineContent); + mLineContent.clear(); + + element->appendTextContent(mWorkingTag); + mWorkingTag.clear(); + + if (mWorkingParagraph) + { + mWorkingParagraph->addChild(std::move(element)); + } + mLineState = LineState::NONE; +} + +void MarkdownParser::onImage() +{ + std::cout << "Adding image with path " << mLineContent << " and alt" << mWorkingTag << std::endl; + auto element = std::make_unique(mLineContent, mWorkingTag); + mLineContent.clear(); + + element->appendTextContent(mWorkingTag); + mWorkingTag.clear(); + + if (mWorkingParagraph) + { + mWorkingParagraph->addChild(std::move(element)); + } + mLineState = LineState::NONE; +} + +void MarkdownParser::onBulletItem() +{ + std::cout << "Adding bullet item " << mLineContent << std::endl; + + if (!mWorkingBulletList) + { + mWorkingBulletList = std::make_unique(); + mDocumentState == DocumentState::IN_BULLETS; + } + + auto item = std::make_unique(); + item->appendTextContent(mLineContent); + mLineContent.clear(); + + mWorkingBulletList->addChild(std::move(item)); +} + void MarkdownParser::processLine() { mLineContent.clear(); @@ -136,8 +196,25 @@ void MarkdownParser::processLine() unsigned tick_count{0}; bool flushed_pre_inline = false; + bool first_nonspace = false; for(auto c : mWorkingLine) { + if (!StringUtils::IsSpace(c)) + { + if (first_nonspace) + { + first_nonspace = false; + } + else + { + first_nonspace = true; + } + } + else + { + first_nonspace = false; + } + if (c == '`') { auto [ret_tick_count, stop_line_processing] = onTick(tick_count); @@ -164,6 +241,87 @@ void MarkdownParser::processLine() { mLineContent += c; } + else if(mLineState == LineState::IN_LINK_TAG) + { + if (c == ']') + { + mLineState = LineState::AWAITING_LINK_BODY; + } + else + { + mWorkingTag += c; + } + } + else if(mLineState == LineState::AWAITING_LINK_BODY) + { + if (c == '(') + { + mLineState = LineState::IN_LINK_BODY; + } + else + { + mLineContent = '[' + mWorkingTag + ']'; + mLineState = LineState::NONE; + } + } + else if(mLineState == LineState::IN_LINK_BODY) + { + if(c==')') + { + onLink(); + } + else + { + mLineContent += c; + } + } + else if(mLineState == LineState::AWAITING_IMG_TAG) + { + if (c == '[') + { + mLineState = LineState::IN_IMG_TAG; + } + else + { + mLineContent = "!["; + mLineState = LineState::NONE; + } + } + else if(mLineState == LineState::IN_IMG_TAG) + { + if (c == ']') + { + mLineState = LineState::AWAITING_IMG_BODY; + } + else + { + mWorkingTag += c; + } + } + else if(mLineState == LineState::AWAITING_IMG_BODY) + { + if (c == '(') + { + mLineState = LineState::IN_IMG_BODY; + } + else + { + mLineContent = "![" + mWorkingTag + "]"; + mWorkingTag.clear(); + mLineState = LineState::NONE; + } + } + else if(mLineState == LineState::IN_IMG_BODY) + { + if (c == ')') + { + onImage(); + } + else + { + mLineContent += c; + } + } else { if (c == '#') @@ -172,6 +330,26 @@ void MarkdownParser::processLine() mLineState = LineState::IN_HEADING; heading_level++; } + else if(c == '[') + { + mDocumentContent += mLineContent; + onTextSpan(); + mLineState = LineState::IN_LINK_TAG; + } + else if(c == '!') + { + mDocumentContent += mLineContent; + onTextSpan(); + mLineState = LineState::AWAITING_IMG_TAG; + } + else if(first_nonspace && c == '*') + { + if (!mWorkingBulletList) + { + onNewParagraph(); + } + mLineState = LineState::IN_BULLETS; + } else { mLineContent += c; @@ -186,12 +364,16 @@ void MarkdownParser::processLine() } else if(mLineState == LineState::IN_MULTILINE_TAG) { - mMultilineTag = mLineContent; + mWorkingTag = mLineContent; } else if (mLineState == LineState::IN_INLINEQUOTE) { onTextSpan(); } + else if (mLineState == LineState::IN_BULLETS) + { + onBulletItem(); + } else { if (mLineContent.size() > 0) diff --git a/src/web/markdown/MarkdownParser.h b/src/web/markdown/MarkdownParser.h index 0c25e9f..b65c333 100644 --- a/src/web/markdown/MarkdownParser.h +++ b/src/web/markdown/MarkdownParser.h @@ -5,13 +5,15 @@ class MarkdownDocument; class MarkdownParagraph; +class MarkdownBulletList; class MarkdownParser { enum class DocumentState { NONE, - IN_MULTILINEQUOTE + IN_MULTILINEQUOTE, + IN_BULLETS }; enum class LineState @@ -19,7 +21,15 @@ class MarkdownParser NONE, IN_HEADING, IN_INLINEQUOTE, - IN_MULTILINE_TAG + IN_MULTILINE_TAG, + IN_LINK_TAG, + AWAITING_LINK_BODY, + IN_LINK_BODY, + AWAITING_IMG_TAG, + IN_IMG_TAG, + AWAITING_IMG_BODY, + IN_IMG_BODY, + IN_BULLETS }; public: @@ -35,10 +45,14 @@ private: void onMultilineQuote(); void onInlineQuote(); void onHeading(unsigned level); + void onLink(); + void onImage(); void onEmptyLine(); void onNewParagraph(); + void onBulletItem(); + void onTextSpan(); std::pair onTick(unsigned tickCount); @@ -46,11 +60,14 @@ private: std::string mWorkingLine; std::string mLineContent; std::string mDocumentContent; - std::string mMultilineTag; + + std::string mWorkingTag; LineState mLineState {LineState::NONE}; DocumentState mDocumentState {DocumentState::NONE}; - std::unique_ptr mWorkingParagraph{nullptr}; + std::unique_ptr mWorkingParagraph; + std::unique_ptr mWorkingBulletList; + std::unique_ptr mMarkdownDocument; };