Simple markdown converter.

This commit is contained in:
James Grogan 2022-12-01 18:38:46 +00:00
parent ec11529b9a
commit 650cfa5c6f
5 changed files with 410 additions and 9 deletions

View file

@ -2,6 +2,7 @@
#include <memory>
#include "XmlElement.h"
#include "XmlAttribute.h"
class HtmlElement : public XmlElement
{
@ -14,7 +15,11 @@ public:
PARAGRAPH,
TEXT_RUN,
CODE,
HEADING
HEADING,
HYPERLINK,
IMAGE,
UNORDERED_LIST,
LIST_ITEM
};
HtmlElement(const std::string& tagName);
@ -50,4 +55,75 @@ public:
}
};
class HtmlHyperlinkElement : public HtmlElement
{
public:
HtmlHyperlinkElement(const std::string& href) : HtmlElement("a")
{
if(!href.empty())
{
auto href_attr = std::make_unique<XmlAttribute>("href");
href_attr->setValue(href);
addAttribute(std::move(href_attr));
}
}
Type getType() const override
{
return Type::HYPERLINK;
}
};
class HtmlImageElement : public HtmlElement
{
public:
HtmlImageElement(const std::string& src, const std::string& alt) : HtmlElement("img")
{
if(!src.empty())
{
auto href_attr = std::make_unique<XmlAttribute>("src");
href_attr->setValue(src);
addAttribute(std::move(href_attr));
}
if(!alt.empty())
{
auto href_attr = std::make_unique<XmlAttribute>("alt");
href_attr->setValue(alt);
addAttribute(std::move(href_attr));
}
}
Type getType() const override
{
return Type::IMAGE;
}
};
class HtmlUnorderedList : public HtmlElement
{
public:
HtmlUnorderedList() : HtmlElement("ul")
{
}
Type getType() const override
{
return Type::UNORDERED_LIST;
}
};
class HtmlListItem : public HtmlElement
{
public:
HtmlListItem() : HtmlElement("li")
{
}
Type getType() const override
{
return Type::LIST_ITEM;
}
};
using HtmlElementUPtr = std::unique_ptr<HtmlElement>;

View file

@ -43,9 +43,35 @@ std::unique_ptr<HtmlDocument> MarkdownConverter::convert(MarkdownDocument* markd
html_text->setText(child->getTextContent());
html_p_element->addChild(std::move(html_text));
}
else if(child->getType() == MarkdownElement::Type::LINK)
{
auto link_element = dynamic_cast<MarkdownLink*>(child);
auto html_text = std::make_unique<HtmlHyperlinkElement>(link_element->getTarget());
html_text->setText(link_element->getTextContent());
html_p_element->addChild(std::move(html_text));
}
else if(child->getType() == MarkdownElement::Type::IMAGE)
{
auto link_element = dynamic_cast<MarkdownImage*>(child);
auto html_text = std::make_unique<HtmlImageElement>(link_element->getSource(), link_element->getAlt());
html_p_element->addChild(std::move(html_text));
}
}
html_doc->addElementToBody(std::move(html_p_element));
}
else if(md_element->getType() == MarkdownElement::Type::BULLET_LIST)
{
auto list_element = dynamic_cast<MarkdownBulletList*>(md_element);
auto html_list = std::make_unique<HtmlUnorderedList>();
for(unsigned idx=0; idx< list_element->getNumChildren(); idx++)
{
auto child = list_element->getChild(idx);
auto html_list_item = std::make_unique<HtmlListItem>();
html_list_item->setText(child->getTextContent());
html_list->addChild(std::move(html_list_item));
}
html_doc->addElementToBody(std::move(html_list));
}
else if(md_element->getType() == MarkdownElement::Type::MULTILINE_QUOTE)
{
auto html_quote = std::make_unique<HtmlCodeElement>();

View file

@ -18,7 +18,9 @@ public:
INLINE_SPECIAL,
MULTILINE_SPECIAL,
LINK,
IMAGE
IMAGE,
BULLET_ITEM,
BULLET_LIST
};
virtual ~MarkdownElement() = default;
@ -83,6 +85,45 @@ public:
std::vector<std::unique_ptr<MarkdownInlineElement> > mChildren;
};
class MarkdownBulletItem : public MarkdownElement
{
public:
virtual ~MarkdownBulletItem() = default;
Type getType() const override
{
return Type::BULLET_ITEM;
}
};
class MarkdownBulletList : public MarkdownElement
{
public:
virtual ~MarkdownBulletList() = default;
Type getType() const override
{
return Type::BULLET_LIST;
}
void addChild(std::unique_ptr<MarkdownBulletItem> child)
{
mChildren.push_back(std::move(child));
}
std::size_t getNumChildren() const
{
return mChildren.size();
}
MarkdownBulletItem* getChild(std::size_t idx) const
{
return mChildren[idx].get();
}
std::vector<std::unique_ptr<MarkdownBulletItem> > mChildren;
};
class MarkdownHeading : public MarkdownElement
{
public:
@ -121,6 +162,63 @@ public:
}
};
class MarkdownLink : public MarkdownInlineElement
{
public:
MarkdownLink(const std::string& target)
: mTarget(target)
{
}
virtual ~MarkdownLink() = default;
const std::string& getTarget() const
{
return mTarget;
}
Type getType() const override
{
return Type::LINK;
}
private:
std::string mTarget;
};
class MarkdownImage : public MarkdownInlineElement
{
public:
MarkdownImage(const std::string& source, const std::string& alt)
: mSource(source),
mAlt(alt)
{
}
virtual ~MarkdownImage() = default;
Type getType() const override
{
return Type::IMAGE;
}
const std::string& getSource() const
{
return mSource;
}
const std::string& getAlt() const
{
return mAlt;
}
private:
std::string mSource;
std::string mAlt;
};
class MarkdownMultilineQuote : public MarkdownElement
{
public:
@ -142,6 +240,8 @@ private:
};
class MarkdownDocument
{
public:

View file

@ -1,6 +1,7 @@
#include "MarkdownParser.h"
#include "MarkdownDocument.h"
#include "StringUtils.h"
#include <sstream>
#include <iostream>
@ -18,9 +19,12 @@ MarkdownParser::~MarkdownParser()
void MarkdownParser::onMultilineQuote()
{
std::cout << "Adding multiline quote " << mDocumentContent << std::endl;
auto quote = std::make_unique<MarkdownMultilineQuote>(mMultilineTag);
auto quote = std::make_unique<MarkdownMultilineQuote>(mWorkingTag);
quote->appendTextContent(mDocumentContent);
mDocumentContent.clear();
mWorkingTag.clear();
mDocumentState = DocumentState::NONE;
mMarkdownDocument->addElement(std::move(quote));
@ -53,7 +57,14 @@ void MarkdownParser::onHeading(unsigned level)
void MarkdownParser::onNewParagraph()
{
if (mWorkingParagraph)
if (mWorkingBulletList)
{
std::cout << "Adding bullets to document" << std::endl;
mMarkdownDocument->addElement(std::move(mWorkingBulletList));
mWorkingBulletList.reset();
mDocumentState == DocumentState::NONE;
}
else if (mWorkingParagraph)
{
onTextSpan();
@ -127,6 +138,55 @@ std::pair<unsigned, bool> MarkdownParser::onTick(unsigned tickCount)
return {new_tick_count, stop_line_processing};
}
void MarkdownParser::onLink()
{
std::cout << "Adding hyperlink to " << mLineContent << " with tag " << mWorkingTag << std::endl;
auto element = std::make_unique<MarkdownLink>(mLineContent);
mLineContent.clear();
element->appendTextContent(mWorkingTag);
mWorkingTag.clear();
if (mWorkingParagraph)
{
mWorkingParagraph->addChild(std::move(element));
}
mLineState = LineState::NONE;
}
void MarkdownParser::onImage()
{
std::cout << "Adding image with path " << mLineContent << " and alt" << mWorkingTag << std::endl;
auto element = std::make_unique<MarkdownImage>(mLineContent, mWorkingTag);
mLineContent.clear();
element->appendTextContent(mWorkingTag);
mWorkingTag.clear();
if (mWorkingParagraph)
{
mWorkingParagraph->addChild(std::move(element));
}
mLineState = LineState::NONE;
}
void MarkdownParser::onBulletItem()
{
std::cout << "Adding bullet item " << mLineContent << std::endl;
if (!mWorkingBulletList)
{
mWorkingBulletList = std::make_unique<MarkdownBulletList>();
mDocumentState == DocumentState::IN_BULLETS;
}
auto item = std::make_unique<MarkdownBulletItem>();
item->appendTextContent(mLineContent);
mLineContent.clear();
mWorkingBulletList->addChild(std::move(item));
}
void MarkdownParser::processLine()
{
mLineContent.clear();
@ -136,8 +196,25 @@ void MarkdownParser::processLine()
unsigned tick_count{0};
bool flushed_pre_inline = false;
bool first_nonspace = false;
for(auto c : mWorkingLine)
{
if (!StringUtils::IsSpace(c))
{
if (first_nonspace)
{
first_nonspace = false;
}
else
{
first_nonspace = true;
}
}
else
{
first_nonspace = false;
}
if (c == '`')
{
auto [ret_tick_count, stop_line_processing] = onTick(tick_count);
@ -164,6 +241,87 @@ void MarkdownParser::processLine()
{
mLineContent += c;
}
else if(mLineState == LineState::IN_LINK_TAG)
{
if (c == ']')
{
mLineState = LineState::AWAITING_LINK_BODY;
}
else
{
mWorkingTag += c;
}
}
else if(mLineState == LineState::AWAITING_LINK_BODY)
{
if (c == '(')
{
mLineState = LineState::IN_LINK_BODY;
}
else
{
mLineContent = '[' + mWorkingTag + ']';
mLineState = LineState::NONE;
}
}
else if(mLineState == LineState::IN_LINK_BODY)
{
if(c==')')
{
onLink();
}
else
{
mLineContent += c;
}
}
else if(mLineState == LineState::AWAITING_IMG_TAG)
{
if (c == '[')
{
mLineState = LineState::IN_IMG_TAG;
}
else
{
mLineContent = "![";
mLineState = LineState::NONE;
}
}
else if(mLineState == LineState::IN_IMG_TAG)
{
if (c == ']')
{
mLineState = LineState::AWAITING_IMG_BODY;
}
else
{
mWorkingTag += c;
}
}
else if(mLineState == LineState::AWAITING_IMG_BODY)
{
if (c == '(')
{
mLineState = LineState::IN_IMG_BODY;
}
else
{
mLineContent = "![" + mWorkingTag + "]";
mWorkingTag.clear();
mLineState = LineState::NONE;
}
}
else if(mLineState == LineState::IN_IMG_BODY)
{
if (c == ')')
{
onImage();
}
else
{
mLineContent += c;
}
}
else
{
if (c == '#')
@ -172,6 +330,26 @@ void MarkdownParser::processLine()
mLineState = LineState::IN_HEADING;
heading_level++;
}
else if(c == '[')
{
mDocumentContent += mLineContent;
onTextSpan();
mLineState = LineState::IN_LINK_TAG;
}
else if(c == '!')
{
mDocumentContent += mLineContent;
onTextSpan();
mLineState = LineState::AWAITING_IMG_TAG;
}
else if(first_nonspace && c == '*')
{
if (!mWorkingBulletList)
{
onNewParagraph();
}
mLineState = LineState::IN_BULLETS;
}
else
{
mLineContent += c;
@ -186,12 +364,16 @@ void MarkdownParser::processLine()
}
else if(mLineState == LineState::IN_MULTILINE_TAG)
{
mMultilineTag = mLineContent;
mWorkingTag = mLineContent;
}
else if (mLineState == LineState::IN_INLINEQUOTE)
{
onTextSpan();
}
else if (mLineState == LineState::IN_BULLETS)
{
onBulletItem();
}
else
{
if (mLineContent.size() > 0)

View file

@ -5,13 +5,15 @@
class MarkdownDocument;
class MarkdownParagraph;
class MarkdownBulletList;
class MarkdownParser
{
enum class DocumentState
{
NONE,
IN_MULTILINEQUOTE
IN_MULTILINEQUOTE,
IN_BULLETS
};
enum class LineState
@ -19,7 +21,15 @@ class MarkdownParser
NONE,
IN_HEADING,
IN_INLINEQUOTE,
IN_MULTILINE_TAG
IN_MULTILINE_TAG,
IN_LINK_TAG,
AWAITING_LINK_BODY,
IN_LINK_BODY,
AWAITING_IMG_TAG,
IN_IMG_TAG,
AWAITING_IMG_BODY,
IN_IMG_BODY,
IN_BULLETS
};
public:
@ -35,10 +45,14 @@ private:
void onMultilineQuote();
void onInlineQuote();
void onHeading(unsigned level);
void onLink();
void onImage();
void onEmptyLine();
void onNewParagraph();
void onBulletItem();
void onTextSpan();
std::pair<unsigned, bool> onTick(unsigned tickCount);
@ -46,11 +60,14 @@ private:
std::string mWorkingLine;
std::string mLineContent;
std::string mDocumentContent;
std::string mMultilineTag;
std::string mWorkingTag;
LineState mLineState {LineState::NONE};
DocumentState mDocumentState {DocumentState::NONE};
std::unique_ptr<MarkdownParagraph> mWorkingParagraph{nullptr};
std::unique_ptr<MarkdownParagraph> mWorkingParagraph;
std::unique_ptr<MarkdownBulletList> mWorkingBulletList;
std::unique_ptr<MarkdownDocument> mMarkdownDocument;
};