parser: Add [content] tag, add extractFirstTag() method

This commit is contained in:
Albert S. 2023-11-27 21:37:54 +01:00
父節點 ff01a00217
當前提交 579fadfb10
共有 3 個檔案被更改,包括 17 行新增5 行删除

查看文件

@ -15,6 +15,7 @@ class IParser
}
public:
virtual std::string extractFirstTag(std::string tagname, const std::string &content) const = 0;
virtual std::string extractCommand(std::string cmdname, const std::string &content) const = 0;
virtual std::vector<std::string> extractCommands(std::string cmdname, const std::string &content) const = 0;

查看文件

@ -63,11 +63,10 @@ std::vector<std::string> Parser::extractCategories(const std::string &content) c
return result;
}
std::string Parser::extractCommand(std::string cmdname, const std::string &content) const
std::string Parser::extractFirstTag(std::string tagname, const std::string &content) const
{
std::string cmd = "[cmd:" + cmdname + "]";
std::string cmdend = "[/cmd:" + cmdname + "]";
std::string cmd = "[" + tagname + "]";
std::string cmdend = "[/" + tagname + "]";
std::string_view view = content;
size_t pos = 0;
if((pos = view.find(cmd)) != std::string::npos)
@ -83,6 +82,12 @@ std::string Parser::extractCommand(std::string cmdname, const std::string &conte
return "";
}
std::string Parser::extractCommand(std::string cmdname, const std::string &content) const
{
return extractFirstTag("cmd:" + cmdname, content);
}
std::vector<std::string> Parser::extractCommands(std::string cmdname, const std::string &content) const
{
std::vector<std::string> result;
@ -170,7 +175,7 @@ std::string Parser::parse(const PageDao &pagedao, UrlProvider &provider, const s
std::string result;
// we don't care about commands, but we nevertheless replace them with empty strings
std::regex tagfinder(
R"(\[(b|i|u|s|li||ul|ol|code|blockquote|img|link|wikilink|h\d|cmd:visible|cmd:rename|cmd:redirect|cmd:pagetitle|cmd:allowinclude|cmd:permissions|category|dynamic:postlist|dynamic:includepage|dynamic:getvar|dynamic:setvar)*?\]((\s|\S)*?)\[/\1])");
R"(\[(b|i|u|s|li||ul|ol|code|blockquote|img|link|wikilink|h\d|cmd:visible|cmd:rename|cmd:redirect|cmd:pagetitle|cmd:allowinclude|cmd:permissions|cmd:parentpage|content|category|dynamic:postlist|dynamic:includepage|dynamic:getvar|dynamic:setvar)*?\]((\s|\S)*?)\[/\1])");
result = utils::regex_callback_replacer(
tagfinder, content,
[&](std::smatch &match)
@ -182,6 +187,11 @@ std::string Parser::parse(const PageDao &pagedao, UrlProvider &provider, const s
{
content = parse(pagedao, provider, content, callback);
}
/* [content] just helps extracting the actual content of a page, pretty much noop otherwise */
if(tag == "content")
{
return parse(pagedao, provider, content, callback);
}
if(std::find(std::begin(justreplace), std::end(justreplace), tag) != std::end(justreplace))
{
return "<" + tag + ">" + content + "</" + tag + ">";

查看文件

@ -9,6 +9,7 @@ class Parser : public IParser
std::string processImage(std::smatch &match) const;
public:
std::string extractFirstTag(std::string tagname, const std::string &content) const override;
std::string extractCommand(std::string cmdname, const std::string &content) const override;
std::vector<std::string> extractCommands(std::string cmdname, const std::string &content) const override;
std::vector<Headline> extractHeadlines(const std::string &content) const override;