Simple markdown converter.

This commit is contained in:
James Grogan 2022-12-01 18:38:46 +00:00
parent ec11529b9a
commit 650cfa5c6f
5 changed files with 410 additions and 9 deletions

View file

@ -2,6 +2,7 @@
#include <memory> #include <memory>
#include "XmlElement.h" #include "XmlElement.h"
#include "XmlAttribute.h"
class HtmlElement : public XmlElement class HtmlElement : public XmlElement
{ {
@ -14,7 +15,11 @@ public:
PARAGRAPH, PARAGRAPH,
TEXT_RUN, TEXT_RUN,
CODE, CODE,
HEADING HEADING,
HYPERLINK,
IMAGE,
UNORDERED_LIST,
LIST_ITEM
}; };
HtmlElement(const std::string& tagName); HtmlElement(const std::string& tagName);
@ -50,4 +55,75 @@ public:
} }
}; };
class HtmlHyperlinkElement : public HtmlElement
{
public:
HtmlHyperlinkElement(const std::string& href) : HtmlElement("a")
{
if(!href.empty())
{
auto href_attr = std::make_unique<XmlAttribute>("href");
href_attr->setValue(href);
addAttribute(std::move(href_attr));
}
}
Type getType() const override
{
return Type::HYPERLINK;
}
};
class HtmlImageElement : public HtmlElement
{
public:
HtmlImageElement(const std::string& src, const std::string& alt) : HtmlElement("img")
{
if(!src.empty())
{
auto href_attr = std::make_unique<XmlAttribute>("src");
href_attr->setValue(src);
addAttribute(std::move(href_attr));
}
if(!alt.empty())
{
auto href_attr = std::make_unique<XmlAttribute>("alt");
href_attr->setValue(alt);
addAttribute(std::move(href_attr));
}
}
Type getType() const override
{
return Type::IMAGE;
}
};
class HtmlUnorderedList : public HtmlElement
{
public:
HtmlUnorderedList() : HtmlElement("ul")
{
}
Type getType() const override
{
return Type::UNORDERED_LIST;
}
};
class HtmlListItem : public HtmlElement
{
public:
HtmlListItem() : HtmlElement("li")
{
}
Type getType() const override
{
return Type::LIST_ITEM;
}
};
using HtmlElementUPtr = std::unique_ptr<HtmlElement>; using HtmlElementUPtr = std::unique_ptr<HtmlElement>;

View file

@ -43,9 +43,35 @@ std::unique_ptr<HtmlDocument> MarkdownConverter::convert(MarkdownDocument* markd
html_text->setText(child->getTextContent()); html_text->setText(child->getTextContent());
html_p_element->addChild(std::move(html_text)); html_p_element->addChild(std::move(html_text));
} }
else if(child->getType() == MarkdownElement::Type::LINK)
{
auto link_element = dynamic_cast<MarkdownLink*>(child);
auto html_text = std::make_unique<HtmlHyperlinkElement>(link_element->getTarget());
html_text->setText(link_element->getTextContent());
html_p_element->addChild(std::move(html_text));
}
else if(child->getType() == MarkdownElement::Type::IMAGE)
{
auto link_element = dynamic_cast<MarkdownImage*>(child);
auto html_text = std::make_unique<HtmlImageElement>(link_element->getSource(), link_element->getAlt());
html_p_element->addChild(std::move(html_text));
}
} }
html_doc->addElementToBody(std::move(html_p_element)); html_doc->addElementToBody(std::move(html_p_element));
} }
else if(md_element->getType() == MarkdownElement::Type::BULLET_LIST)
{
auto list_element = dynamic_cast<MarkdownBulletList*>(md_element);
auto html_list = std::make_unique<HtmlUnorderedList>();
for(unsigned idx=0; idx< list_element->getNumChildren(); idx++)
{
auto child = list_element->getChild(idx);
auto html_list_item = std::make_unique<HtmlListItem>();
html_list_item->setText(child->getTextContent());
html_list->addChild(std::move(html_list_item));
}
html_doc->addElementToBody(std::move(html_list));
}
else if(md_element->getType() == MarkdownElement::Type::MULTILINE_QUOTE) else if(md_element->getType() == MarkdownElement::Type::MULTILINE_QUOTE)
{ {
auto html_quote = std::make_unique<HtmlCodeElement>(); auto html_quote = std::make_unique<HtmlCodeElement>();

View file

@ -18,7 +18,9 @@ public:
INLINE_SPECIAL, INLINE_SPECIAL,
MULTILINE_SPECIAL, MULTILINE_SPECIAL,
LINK, LINK,
IMAGE IMAGE,
BULLET_ITEM,
BULLET_LIST
}; };
virtual ~MarkdownElement() = default; virtual ~MarkdownElement() = default;
@ -83,6 +85,45 @@ public:
std::vector<std::unique_ptr<MarkdownInlineElement> > mChildren; std::vector<std::unique_ptr<MarkdownInlineElement> > mChildren;
}; };
class MarkdownBulletItem : public MarkdownElement
{
public:
virtual ~MarkdownBulletItem() = default;
Type getType() const override
{
return Type::BULLET_ITEM;
}
};
class MarkdownBulletList : public MarkdownElement
{
public:
virtual ~MarkdownBulletList() = default;
Type getType() const override
{
return Type::BULLET_LIST;
}
void addChild(std::unique_ptr<MarkdownBulletItem> child)
{
mChildren.push_back(std::move(child));
}
std::size_t getNumChildren() const
{
return mChildren.size();
}
MarkdownBulletItem* getChild(std::size_t idx) const
{
return mChildren[idx].get();
}
std::vector<std::unique_ptr<MarkdownBulletItem> > mChildren;
};
class MarkdownHeading : public MarkdownElement class MarkdownHeading : public MarkdownElement
{ {
public: public:
@ -121,6 +162,63 @@ public:
} }
}; };
class MarkdownLink : public MarkdownInlineElement
{
public:
MarkdownLink(const std::string& target)
: mTarget(target)
{
}
virtual ~MarkdownLink() = default;
const std::string& getTarget() const
{
return mTarget;
}
Type getType() const override
{
return Type::LINK;
}
private:
std::string mTarget;
};
class MarkdownImage : public MarkdownInlineElement
{
public:
MarkdownImage(const std::string& source, const std::string& alt)
: mSource(source),
mAlt(alt)
{
}
virtual ~MarkdownImage() = default;
Type getType() const override
{
return Type::IMAGE;
}
const std::string& getSource() const
{
return mSource;
}
const std::string& getAlt() const
{
return mAlt;
}
private:
std::string mSource;
std::string mAlt;
};
class MarkdownMultilineQuote : public MarkdownElement class MarkdownMultilineQuote : public MarkdownElement
{ {
public: public:
@ -142,6 +240,8 @@ private:
}; };
class MarkdownDocument class MarkdownDocument
{ {
public: public:

View file

@ -1,6 +1,7 @@
#include "MarkdownParser.h" #include "MarkdownParser.h"
#include "MarkdownDocument.h" #include "MarkdownDocument.h"
#include "StringUtils.h"
#include <sstream> #include <sstream>
#include <iostream> #include <iostream>
@ -18,9 +19,12 @@ MarkdownParser::~MarkdownParser()
void MarkdownParser::onMultilineQuote() void MarkdownParser::onMultilineQuote()
{ {
std::cout << "Adding multiline quote " << mDocumentContent << std::endl; std::cout << "Adding multiline quote " << mDocumentContent << std::endl;
auto quote = std::make_unique<MarkdownMultilineQuote>(mMultilineTag); auto quote = std::make_unique<MarkdownMultilineQuote>(mWorkingTag);
quote->appendTextContent(mDocumentContent); quote->appendTextContent(mDocumentContent);
mDocumentContent.clear(); mDocumentContent.clear();
mWorkingTag.clear();
mDocumentState = DocumentState::NONE; mDocumentState = DocumentState::NONE;
mMarkdownDocument->addElement(std::move(quote)); mMarkdownDocument->addElement(std::move(quote));
@ -53,7 +57,14 @@ void MarkdownParser::onHeading(unsigned level)
void MarkdownParser::onNewParagraph() void MarkdownParser::onNewParagraph()
{ {
if (mWorkingParagraph) if (mWorkingBulletList)
{
std::cout << "Adding bullets to document" << std::endl;
mMarkdownDocument->addElement(std::move(mWorkingBulletList));
mWorkingBulletList.reset();
mDocumentState == DocumentState::NONE;
}
else if (mWorkingParagraph)
{ {
onTextSpan(); onTextSpan();
@ -127,6 +138,55 @@ std::pair<unsigned, bool> MarkdownParser::onTick(unsigned tickCount)
return {new_tick_count, stop_line_processing}; return {new_tick_count, stop_line_processing};
} }
void MarkdownParser::onLink()
{
std::cout << "Adding hyperlink to " << mLineContent << " with tag " << mWorkingTag << std::endl;
auto element = std::make_unique<MarkdownLink>(mLineContent);
mLineContent.clear();
element->appendTextContent(mWorkingTag);
mWorkingTag.clear();
if (mWorkingParagraph)
{
mWorkingParagraph->addChild(std::move(element));
}
mLineState = LineState::NONE;
}
void MarkdownParser::onImage()
{
std::cout << "Adding image with path " << mLineContent << " and alt" << mWorkingTag << std::endl;
auto element = std::make_unique<MarkdownImage>(mLineContent, mWorkingTag);
mLineContent.clear();
element->appendTextContent(mWorkingTag);
mWorkingTag.clear();
if (mWorkingParagraph)
{
mWorkingParagraph->addChild(std::move(element));
}
mLineState = LineState::NONE;
}
void MarkdownParser::onBulletItem()
{
std::cout << "Adding bullet item " << mLineContent << std::endl;
if (!mWorkingBulletList)
{
mWorkingBulletList = std::make_unique<MarkdownBulletList>();
mDocumentState == DocumentState::IN_BULLETS;
}
auto item = std::make_unique<MarkdownBulletItem>();
item->appendTextContent(mLineContent);
mLineContent.clear();
mWorkingBulletList->addChild(std::move(item));
}
void MarkdownParser::processLine() void MarkdownParser::processLine()
{ {
mLineContent.clear(); mLineContent.clear();
@ -136,8 +196,25 @@ void MarkdownParser::processLine()
unsigned tick_count{0}; unsigned tick_count{0};
bool flushed_pre_inline = false; bool flushed_pre_inline = false;
bool first_nonspace = false;
for(auto c : mWorkingLine) for(auto c : mWorkingLine)
{ {
if (!StringUtils::IsSpace(c))
{
if (first_nonspace)
{
first_nonspace = false;
}
else
{
first_nonspace = true;
}
}
else
{
first_nonspace = false;
}
if (c == '`') if (c == '`')
{ {
auto [ret_tick_count, stop_line_processing] = onTick(tick_count); auto [ret_tick_count, stop_line_processing] = onTick(tick_count);
@ -164,6 +241,87 @@ void MarkdownParser::processLine()
{ {
mLineContent += c; mLineContent += c;
} }
else if(mLineState == LineState::IN_LINK_TAG)
{
if (c == ']')
{
mLineState = LineState::AWAITING_LINK_BODY;
}
else
{
mWorkingTag += c;
}
}
else if(mLineState == LineState::AWAITING_LINK_BODY)
{
if (c == '(')
{
mLineState = LineState::IN_LINK_BODY;
}
else
{
mLineContent = '[' + mWorkingTag + ']';
mLineState = LineState::NONE;
}
}
else if(mLineState == LineState::IN_LINK_BODY)
{
if(c==')')
{
onLink();
}
else
{
mLineContent += c;
}
}
else if(mLineState == LineState::AWAITING_IMG_TAG)
{
if (c == '[')
{
mLineState = LineState::IN_IMG_TAG;
}
else
{
mLineContent = "![";
mLineState = LineState::NONE;
}
}
else if(mLineState == LineState::IN_IMG_TAG)
{
if (c == ']')
{
mLineState = LineState::AWAITING_IMG_BODY;
}
else
{
mWorkingTag += c;
}
}
else if(mLineState == LineState::AWAITING_IMG_BODY)
{
if (c == '(')
{
mLineState = LineState::IN_IMG_BODY;
}
else
{
mLineContent = "![" + mWorkingTag + "]";
mWorkingTag.clear();
mLineState = LineState::NONE;
}
}
else if(mLineState == LineState::IN_IMG_BODY)
{
if (c == ')')
{
onImage();
}
else
{
mLineContent += c;
}
}
else else
{ {
if (c == '#') if (c == '#')
@ -172,6 +330,26 @@ void MarkdownParser::processLine()
mLineState = LineState::IN_HEADING; mLineState = LineState::IN_HEADING;
heading_level++; heading_level++;
} }
else if(c == '[')
{
mDocumentContent += mLineContent;
onTextSpan();
mLineState = LineState::IN_LINK_TAG;
}
else if(c == '!')
{
mDocumentContent += mLineContent;
onTextSpan();
mLineState = LineState::AWAITING_IMG_TAG;
}
else if(first_nonspace && c == '*')
{
if (!mWorkingBulletList)
{
onNewParagraph();
}
mLineState = LineState::IN_BULLETS;
}
else else
{ {
mLineContent += c; mLineContent += c;
@ -186,12 +364,16 @@ void MarkdownParser::processLine()
} }
else if(mLineState == LineState::IN_MULTILINE_TAG) else if(mLineState == LineState::IN_MULTILINE_TAG)
{ {
mMultilineTag = mLineContent; mWorkingTag = mLineContent;
} }
else if (mLineState == LineState::IN_INLINEQUOTE) else if (mLineState == LineState::IN_INLINEQUOTE)
{ {
onTextSpan(); onTextSpan();
} }
else if (mLineState == LineState::IN_BULLETS)
{
onBulletItem();
}
else else
{ {
if (mLineContent.size() > 0) if (mLineContent.size() > 0)

View file

@ -5,13 +5,15 @@
class MarkdownDocument; class MarkdownDocument;
class MarkdownParagraph; class MarkdownParagraph;
class MarkdownBulletList;
class MarkdownParser class MarkdownParser
{ {
enum class DocumentState enum class DocumentState
{ {
NONE, NONE,
IN_MULTILINEQUOTE IN_MULTILINEQUOTE,
IN_BULLETS
}; };
enum class LineState enum class LineState
@ -19,7 +21,15 @@ class MarkdownParser
NONE, NONE,
IN_HEADING, IN_HEADING,
IN_INLINEQUOTE, IN_INLINEQUOTE,
IN_MULTILINE_TAG IN_MULTILINE_TAG,
IN_LINK_TAG,
AWAITING_LINK_BODY,
IN_LINK_BODY,
AWAITING_IMG_TAG,
IN_IMG_TAG,
AWAITING_IMG_BODY,
IN_IMG_BODY,
IN_BULLETS
}; };
public: public:
@ -35,10 +45,14 @@ private:
void onMultilineQuote(); void onMultilineQuote();
void onInlineQuote(); void onInlineQuote();
void onHeading(unsigned level); void onHeading(unsigned level);
void onLink();
void onImage();
void onEmptyLine(); void onEmptyLine();
void onNewParagraph(); void onNewParagraph();
void onBulletItem();
void onTextSpan(); void onTextSpan();
std::pair<unsigned, bool> onTick(unsigned tickCount); std::pair<unsigned, bool> onTick(unsigned tickCount);
@ -46,11 +60,14 @@ private:
std::string mWorkingLine; std::string mWorkingLine;
std::string mLineContent; std::string mLineContent;
std::string mDocumentContent; std::string mDocumentContent;
std::string mMultilineTag;
std::string mWorkingTag;
LineState mLineState {LineState::NONE}; LineState mLineState {LineState::NONE};
DocumentState mDocumentState {DocumentState::NONE}; DocumentState mDocumentState {DocumentState::NONE};
std::unique_ptr<MarkdownParagraph> mWorkingParagraph{nullptr}; std::unique_ptr<MarkdownParagraph> mWorkingParagraph;
std::unique_ptr<MarkdownBulletList> mWorkingBulletList;
std::unique_ptr<MarkdownDocument> mMarkdownDocument; std::unique_ptr<MarkdownDocument> mMarkdownDocument;
}; };