No paragraphs for html (#27)

* htmlparser added
* Added ParserConfig
    * added option to disable the emphasized parser
    * added option to not wrap HTML in markdown within a paragraph in output
* Updated docs
* Version update 1.1.1
This commit is contained in:
Petra Baranski 2019-12-27 19:48:29 +01:00 committed by GitHub
parent 3b3e16a6bc
commit 2fe7a71bf3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 296 additions and 8 deletions

View File

@ -1,7 +1,7 @@
# maddy
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Version: 1.1.0](https://img.shields.io/badge/Version-1.1.0-brightgreen.svg)](https://semver.org/)
[![Version: 1.1.1](https://img.shields.io/badge/Version-1.1.1-brightgreen.svg)](https://semver.org/)
[![Travis Build Status](https://travis-ci.org/progsource/maddy.svg?branch=master)](https://travis-ci.org/progsource/maddy)
[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/04m0lg27kigv1pg8/branch/master?svg=true)](https://ci.appveyor.com/project/progsource/maddy/branch/master)
@ -42,7 +42,13 @@ and in the code, you can then do the following:
#include "maddy/parser.h"
std::stringstream markdownInput("");
std::shared_ptr<maddy::Parser> parser = std::make_shared<maddy::Parser>();
// config is optional
std::shared_ptr<maddy::ParserConfig> config = std::make_shared<maddy::ParserConfig>();
config->isEmphasizedParserEnabled = true; // default
config->isHTMLWrappedInParagraph = true; // default
std::shared_ptr<maddy::Parser> parser = std::make_shared<maddy::Parser>(config);
std::string htmlOutput = parser->Parse(markdownInput);
```

View File

@ -7,6 +7,10 @@ destroy the output, if there was HTML in your markdown.
The Parser expects you to use spaces and not tabs for indentation in the
markdown.
If a line starts with `<` and `config->isHTMLWrappedInParagraph` is false, it
expects that the upcoming line is HTML and therefor will not be surrounded by a
paragraph.
## Headlines
```
@ -241,6 +245,8 @@ results in
## emphasized
This can be disabled by setting `config->isEmphasizedParserEnabled = false`.
```
_emphasized text_
```

127
include/maddy/htmlparser.h Normal file
View File

@ -0,0 +1,127 @@
/*
* This project is licensed under the MIT license. For more information see the
* LICENSE file.
*/
#pragma once
// -----------------------------------------------------------------------------
#include <functional>
#include <string>
#include "maddy/blockparser.h"
// -----------------------------------------------------------------------------
namespace maddy {
// -----------------------------------------------------------------------------
/**
* HtmlParser
*
* @class
*/
class HtmlParser : public BlockParser
{
public:
/**
* ctor
*
* @method
* @param {std::function<void(std::string&)>} parseLineCallback
* @param {std::function<std::shared_ptr<BlockParser>(const std::string& line)>} getBlockParserForLineCallback
*/
HtmlParser(
std::function<void(std::string&)> parseLineCallback,
std::function<std::shared_ptr<BlockParser>(const std::string& line)> getBlockParserForLineCallback
)
: BlockParser(parseLineCallback, getBlockParserForLineCallback)
, isStarted(false)
, isFinished(false)
, isGreaterThanFound(false)
{}
/**
* IsStartingLine
*
* If the line is starting with `<`, HTML is expected to follow.
* Nothing after that will be parsed, it only is copied.
*
* @method
* @param {const std::string&} line
* @return {bool}
*/
static bool
IsStartingLine(const std::string& line)
{
return line[0] == '<';
}
/**
* IsFinished
*
* `>` followed by an empty line will end the HTML block.
*
* @method
* @return {bool}
*/
bool
IsFinished() const override
{
return this->isFinished;
}
protected:
bool
isInlineBlockAllowed() const override
{
return false;
}
bool
isLineParserAllowed() const override
{
return false;
}
void
parseBlock(std::string& line) override
{
if (!this->isStarted)
{
this->isStarted = true;
}
if (!line.empty() && line[line.size() - 1] == '>')
{
this->isGreaterThanFound = true;
return;
}
if (line.empty() && this->isGreaterThanFound)
{
this->isFinished = true;
return;
}
if (!line.empty() && this->isGreaterThanFound)
{
this->isGreaterThanFound = false;
}
if (!line.empty())
{
line += " ";
}
}
private:
bool isStarted;
bool isFinished;
bool isGreaterThanFound;
}; // class HtmlParser
// -----------------------------------------------------------------------------
} // namespace maddy

View File

@ -44,8 +44,9 @@ public:
/**
* IsStartingLine
*
* If the line is not empty, it will be a paragraph. So this block parser has
* to always run as the last one!
* If the line is not empty, it will be a paragraph.
*
* This block parser has to always run as the last one!
*
* @method
* @param {const std::string&} line

View File

@ -10,11 +10,14 @@
#include <functional>
#include <string>
#include "maddy/parserconfig.h"
// BlockParser
#include "maddy/checklistparser.h"
#include "maddy/codeblockparser.h"
#include "maddy/headlineparser.h"
#include "maddy/horizontallineparser.h"
#include "maddy/htmlparser.h"
#include "maddy/orderedlistparser.h"
#include "maddy/paragraphparser.h"
#include "maddy/quoteparser.h"
@ -54,8 +57,9 @@ public:
*
* @method
*/
Parser()
: breakLineParser(std::make_shared<BreakLineParser>())
Parser(std::shared_ptr<ParserConfig> config = nullptr)
: config(config)
, breakLineParser(std::make_shared<BreakLineParser>())
, emphasizedParser(std::make_shared<EmphasizedParser>())
, imageParser(std::make_shared<ImageParser>())
, inlineCodeParser(std::make_shared<InlineCodeParser>())
@ -113,6 +117,7 @@ public:
}
private:
std::shared_ptr<ParserConfig> config;
std::shared_ptr<BreakLineParser> breakLineParser;
std::shared_ptr<EmphasizedParser> emphasizedParser;
std::shared_ptr<ImageParser> imageParser;
@ -132,7 +137,11 @@ private:
// Attention! StrongParser has to be before EmphasizedParser
this->strongParser->Parse(line);
this->emphasizedParser->Parse(line);
if (!this->config || this->config->isEmphasizedParserEnabled)
{
this->emphasizedParser->Parse(line);
}
this->strikeThroughParser->Parse(line);
@ -195,6 +204,14 @@ private:
{
parser = this->createUnorderedListParser();
}
else if (
this->config &&
!this->config->isHTMLWrappedInParagraph &&
maddy::HtmlParser::IsStartingLine(line)
)
{
parser = std::make_shared<maddy::HtmlParser>(nullptr, nullptr);
}
else if (maddy::ParagraphParser::IsStartingLine(line))
{
parser = std::make_shared<maddy::ParagraphParser>(

View File

@ -0,0 +1,31 @@
/*
* This project is licensed under the MIT license. For more information see the
* LICENSE file.
*/
#pragma once
// -----------------------------------------------------------------------------
namespace maddy {
// -----------------------------------------------------------------------------
/**
* ParserConfig
*
* @class
*/
struct ParserConfig
{
bool isEmphasizedParserEnabled;
bool isHTMLWrappedInParagraph;
ParserConfig()
: isEmphasizedParserEnabled(true)
, isHTMLWrappedInParagraph(true)
{}
}; // class ParserConfig
// -----------------------------------------------------------------------------
} // namespace maddy

View File

@ -0,0 +1,82 @@
/*
* This project is licensed under the MIT license. For more information see the
* LICENSE file.
*/
#include <memory>
#include "gmock/gmock.h"
#include "maddy/htmlparser.h"
// -----------------------------------------------------------------------------
class MADDY_HTMLPARSER : public ::testing::Test
{
protected:
std::shared_ptr<maddy::HtmlParser> pParser;
void
SetUp() override
{
this->pParser = std::make_shared<maddy::HtmlParser>(
nullptr,
nullptr
);
}
};
// -----------------------------------------------------------------------------
TEST_F(MADDY_HTMLPARSER, IsFinishedReturnsFalseInTheBeginning)
{
ASSERT_FALSE(pParser->IsFinished());
}
TEST_F(MADDY_HTMLPARSER, IsStartingLineReturnsFalseWhenFacedWithNoSmallerThan)
{
const std::vector<std::string> markdown = {
"> quote"
, "some text"
, "* list"
, "1. numbered list"
, "|table>"
};
for (size_t i = 0; i < markdown.size(); ++i)
{
ASSERT_FALSE(maddy::HtmlParser::IsStartingLine(markdown[i]));
}
}
TEST_F(MADDY_HTMLPARSER, IsStartingLineReturnsTrueWhenFacedWithSmallerThan)
{
const std::string markdown = "<div id=\"test\">test element</div>";
ASSERT_TRUE(maddy::HtmlParser::IsStartingLine(markdown));
}
TEST_F(MADDY_HTMLPARSER, ItReplacesNoHtml)
{
const std::vector<std::string> markdown {
"some text in a paragraph"
, ""
, "<div> some HTML</div>"
, ""
, "<div>more"
, "HTML"
, "</div>"
, ""
};
const std::string expected = "some text in a paragraph <div> some HTML</div><div>more HTML </div>";
for (std::string md : markdown)
{
pParser->AddLine(md);
}
ASSERT_TRUE(pParser->IsFinished());
std::stringstream& output(pParser->GetResult());
const std::string& outputString = output.str();
ASSERT_EQ(expected, outputString);
}

View File

@ -19,3 +19,18 @@ TEST(MADDY_PARSER, ItShouldParse)
ASSERT_EQ(testHtml, output);
}
TEST(MADDY_PARSER, ItShouldParseWithConfig)
{
auto config = std::make_shared<maddy::ParserConfig>();
config->isEmphasizedParserEnabled = false;
config->isHTMLWrappedInParagraph = false;
auto parser = std::make_shared<maddy::Parser>(config);
std::stringstream markdown(testMarkdown);
const std::string output = parser->Parse(markdown);
ASSERT_EQ(testHtml2, output);
}

View File

@ -42,6 +42,8 @@ And well - let's see how an image would be shown:\n\
\n\
---\n\
\n\
<a name=\"to top\"></a>\n\
\n\
### and more headlines\n\
\n\
- [ ] how\n\
@ -67,4 +69,5 @@ foot a|foot b|foot c\n\
\n\
";
const std::string testHtml = "<h1>This is a test</h1><p>This should result in a praragraph it's that simple. </p><ul><li>an <i>unordered</i> list<ul><li>with some <strong>hierarchy</strong><ol><li>and an <em>ordered</em></li><li>list</li><li>directly</li></ol></li><li>inside</li></ul></li></ul><pre><code>\nvar c = 'blub';\n</code></pre><blockquote><p>A Quote </p><p>With some <s>text</s> blocks inside </p><ul><li>even a list </li><li>should be </li><li>possible </li></ul></blockquote><p>And well <code>inline code</code> should also work. </p><h2>Another Headline</h2><p>And not to forget <a href=\"http://progsource.de\">link to progsource</a> should work. And well - let's see how an image would be shown: </p><p><img src=\"http://progsource.de/img/progsource.png\" alt=\"an image\"/> </p><hr/><h3>and more headlines</h3><ul class=\"checklist\"><li><label><input type=\"checkbox\"/> how</label></li><li><label><input type=\"checkbox\"/> about<ul class=\"checklist\"><li><label><input type=\"checkbox\"/> a</label></li><li><label><input type=\"checkbox\" checked=\"checked\"/> nice</label></li></ul></label></li><li><label><input type=\"checkbox\" checked=\"checked\"/> check</label></li><li><label><input type=\"checkbox\"/> list</label></li></ul><h4>even a table</h4><table><thead><tr><th>Left header</th><th>middle header</th><th>last header</th></tr></thead><tbody><tr><td>cell 1</td><td>cell <strong>2</strong></td><td>cell 3</td></tr><tr><td>cell 4</td><td>cell 5</td><td>cell 6</td></tr></tbody><tfoot><tr><td>foot a</td><td>foot b</td><td>foot c</td></tr></tfoot></table><h5>h5</h5><h6>h6</h6>";
const std::string testHtml = "<h1>This is a test</h1><p>This should result in a praragraph it's that simple. </p><ul><li>an <i>unordered</i> list<ul><li>with some <strong>hierarchy</strong><ol><li>and an <em>ordered</em></li><li>list</li><li>directly</li></ol></li><li>inside</li></ul></li></ul><pre><code>\nvar c = 'blub';\n</code></pre><blockquote><p>A Quote </p><p>With some <s>text</s> blocks inside </p><ul><li>even a list </li><li>should be </li><li>possible </li></ul></blockquote><p>And well <code>inline code</code> should also work. </p><h2>Another Headline</h2><p>And not to forget <a href=\"http://progsource.de\">link to progsource</a> should work. And well - let's see how an image would be shown: </p><p><img src=\"http://progsource.de/img/progsource.png\" alt=\"an image\"/> </p><hr/><p><a name=\"to top\"></a> </p><h3>and more headlines</h3><ul class=\"checklist\"><li><label><input type=\"checkbox\"/> how</label></li><li><label><input type=\"checkbox\"/> about<ul class=\"checklist\"><li><label><input type=\"checkbox\"/> a</label></li><li><label><input type=\"checkbox\" checked=\"checked\"/> nice</label></li></ul></label></li><li><label><input type=\"checkbox\" checked=\"checked\"/> check</label></li><li><label><input type=\"checkbox\"/> list</label></li></ul><h4>even a table</h4><table><thead><tr><th>Left header</th><th>middle header</th><th>last header</th></tr></thead><tbody><tr><td>cell 1</td><td>cell <strong>2</strong></td><td>cell 3</td></tr><tr><td>cell 4</td><td>cell 5</td><td>cell 6</td></tr></tbody><tfoot><tr><td>foot a</td><td>foot b</td><td>foot c</td></tr></tfoot></table><h5>h5</h5><h6>h6</h6>";
const std::string testHtml2 = "<h1>This is a test</h1><p>This should result in a praragraph it's that simple. </p><ul><li>an <i>unordered</i> list<ul><li>with some <strong>hierarchy</strong><ol><li>and an _ordered_</li><li>list</li><li>directly</li></ol></li><li>inside</li></ul></li></ul><pre><code>\nvar c = 'blub';\n</code></pre><blockquote><p>A Quote </p><p>With some <s>text</s> blocks inside </p><ul><li>even a list </li><li>should be </li><li>possible </li></ul></blockquote><p>And well <code>inline code</code> should also work. </p><h2>Another Headline</h2><p>And not to forget <a href=\"http://progsource.de\">link to progsource</a> should work. And well - let's see how an image would be shown: </p><p><img src=\"http://progsource.de/img/progsource.png\" alt=\"an image\"/> </p><hr/><a name=\"to top\"></a><h3>and more headlines</h3><ul class=\"checklist\"><li><label><input type=\"checkbox\"/> how</label></li><li><label><input type=\"checkbox\"/> about<ul class=\"checklist\"><li><label><input type=\"checkbox\"/> a</label></li><li><label><input type=\"checkbox\" checked=\"checked\"/> nice</label></li></ul></label></li><li><label><input type=\"checkbox\" checked=\"checked\"/> check</label></li><li><label><input type=\"checkbox\"/> list</label></li></ul><h4>even a table</h4><table><thead><tr><th>Left header</th><th>middle header</th><th>last header</th></tr></thead><tbody><tr><td>cell 1</td><td>cell <strong>2</strong></td><td>cell 3</td></tr><tr><td>cell 4</td><td>cell 5</td><td>cell 6</td></tr></tbody><tfoot><tr><td>foot a</td><td>foot b</td><td>foot c</td></tr></tfoot></table><h5>h5</h5><h6>h6</h6>";