Parser: Seperate parseDynamcis(), fix headline extraction with tags inside them
This commit is contained in:
orang tua
15e4f081cc
melakukan
fbca85e5ed
@ -23,6 +23,10 @@ class IParser
|
||||
}
|
||||
virtual std::string parse(const PageDao &pagedao, UrlProvider &provider, const std::string &content,
|
||||
const std::function<std::string(std::string_view, std::string_view)> &callback) const = 0;
|
||||
|
||||
virtual std::string parseDynamics(
|
||||
const std::string &content,
|
||||
const std::function<std::string(std::string_view, std::string_view)> &callback) const = 0;
|
||||
virtual std::vector<std::string> extractCategories(const std::string &content) const = 0;
|
||||
|
||||
virtual ~IParser(){};
|
||||
|
15
parser.cpp
15
parser.cpp
@ -30,7 +30,8 @@ SOFTWARE.
|
||||
std::vector<Headline> Parser::extractHeadlines(const std::string &content) const
|
||||
{
|
||||
std::vector<Headline> result;
|
||||
std::string reg = R"(\[h(1|2|3)\](.*?)\[/h\1\])";
|
||||
|
||||
std::string reg = R"(\[h(1|2|3)\](\[.*?\])*(.*?)\[.*?\]*\[\/h\1\])";
|
||||
std::regex headerfinder(reg);
|
||||
auto begin = std::sregex_iterator(content.begin(), content.end(), headerfinder);
|
||||
auto end = std::sregex_iterator();
|
||||
@ -40,7 +41,7 @@ std::vector<Headline> Parser::extractHeadlines(const std::string &content) const
|
||||
auto smatch = *it;
|
||||
Headline h;
|
||||
h.level = utils::toUInt(smatch.str(1));
|
||||
h.title = smatch.str(2);
|
||||
h.title = smatch.str(3);
|
||||
result.push_back(h);
|
||||
}
|
||||
return result;
|
||||
@ -122,7 +123,7 @@ std::string Parser::parse(const PageDao &pagedao, UrlProvider &provider, const s
|
||||
std::string result;
|
||||
// we don't care about commands, but we nevertheless replace them with empty strings
|
||||
std::regex tagfinder(
|
||||
R"(\[(b|i|u|li||ul|ol|link|wikilink|h\d|cmd:rename|cmd:redirect|cmd:pagetitle|category|dynamic:postlist|dynamic:includepage)*?\]((\s|\S)*?)\[/\1])");
|
||||
R"(\[(b|i|u|li||ul|ol|link|wikilink|h\d|cmd:rename|cmd:redirect|cmd:pagetitle|category|dynamic:postlist|dynamic:includepage|dynamic:getvar|dynamic:setvar)*?\]((\s|\S)*?)\[/\1])");
|
||||
result = utils::regex_callback_replacer(
|
||||
tagfinder, content,
|
||||
[&](std::smatch &match)
|
||||
@ -150,3 +151,11 @@ std::string Parser::parse(const PageDao &pagedao, UrlProvider &provider, const s
|
||||
result = utils::strreplace(result, "\r\n", "<br>");
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string Parser::parseDynamics(const std::string &content,
|
||||
const std::function<std::string(std::string_view, std::string_view)> &callback) const
|
||||
{
|
||||
std::regex tagfinder(R"(\[(dynamic:\w+)*?\]((\s|\S)*?)\[/\1])");
|
||||
return utils::regex_callback_replacer(tagfinder, content,
|
||||
[&](std::smatch &match) { return callback(match.str(1), match.str(2)); });
|
||||
}
|
||||
|
3
parser.h
3
parser.h
@ -15,6 +15,9 @@ class Parser : public IParser
|
||||
virtual std::string parse(
|
||||
const PageDao &pagedao, UrlProvider &provider, const std::string &content,
|
||||
const std::function<std::string(std::string_view, std::string_view)> &callback) const override;
|
||||
std::string parseDynamics(
|
||||
const std::string &content,
|
||||
const std::function<std::string(std::string_view, std::string_view)> &callback) const override;
|
||||
|
||||
using IParser::IParser;
|
||||
};
|
||||
|
Memuat…
Reference in New Issue
Block a user