parser: Add [content] tag, add extractFirstTag() method

This commit is contained in:
Albert S. 2023-11-27 21:37:54 +01:00
والد ff01a00217
کامیت 579fadfb10
3فایلهای تغییر یافته به همراه17 افزوده شده و 5 حذف شده

مشاهده پرونده

@ -15,6 +15,7 @@ class IParser
} }
public: public:
virtual std::string extractFirstTag(std::string tagname, const std::string &content) const = 0;
virtual std::string extractCommand(std::string cmdname, const std::string &content) const = 0; virtual std::string extractCommand(std::string cmdname, const std::string &content) const = 0;
virtual std::vector<std::string> extractCommands(std::string cmdname, const std::string &content) const = 0; virtual std::vector<std::string> extractCommands(std::string cmdname, const std::string &content) const = 0;

مشاهده پرونده

@ -63,11 +63,10 @@ std::vector<std::string> Parser::extractCategories(const std::string &content) c
return result; return result;
} }
std::string Parser::extractCommand(std::string cmdname, const std::string &content) const std::string Parser::extractFirstTag(std::string tagname, const std::string &content) const
{ {
std::string cmd = "[cmd:" + cmdname + "]"; std::string cmd = "[" + tagname + "]";
std::string cmdend = "[/cmd:" + cmdname + "]"; std::string cmdend = "[/" + tagname + "]";
std::string_view view = content; std::string_view view = content;
size_t pos = 0; size_t pos = 0;
if((pos = view.find(cmd)) != std::string::npos) if((pos = view.find(cmd)) != std::string::npos)
@ -83,6 +82,12 @@ std::string Parser::extractCommand(std::string cmdname, const std::string &conte
return ""; return "";
} }
std::string Parser::extractCommand(std::string cmdname, const std::string &content) const
{
return extractFirstTag("cmd:" + cmdname, content);
}
std::vector<std::string> Parser::extractCommands(std::string cmdname, const std::string &content) const std::vector<std::string> Parser::extractCommands(std::string cmdname, const std::string &content) const
{ {
std::vector<std::string> result; std::vector<std::string> result;
@ -170,7 +175,7 @@ std::string Parser::parse(const PageDao &pagedao, UrlProvider &provider, const s
std::string result; std::string result;
// we don't care about commands, but we nevertheless replace them with empty strings // we don't care about commands, but we nevertheless replace them with empty strings
std::regex tagfinder( std::regex tagfinder(
R"(\[(b|i|u|s|li||ul|ol|code|blockquote|img|link|wikilink|h\d|cmd:visible|cmd:rename|cmd:redirect|cmd:pagetitle|cmd:allowinclude|cmd:permissions|category|dynamic:postlist|dynamic:includepage|dynamic:getvar|dynamic:setvar)*?\]((\s|\S)*?)\[/\1])"); R"(\[(b|i|u|s|li||ul|ol|code|blockquote|img|link|wikilink|h\d|cmd:visible|cmd:rename|cmd:redirect|cmd:pagetitle|cmd:allowinclude|cmd:permissions|cmd:parentpage|content|category|dynamic:postlist|dynamic:includepage|dynamic:getvar|dynamic:setvar)*?\]((\s|\S)*?)\[/\1])");
result = utils::regex_callback_replacer( result = utils::regex_callback_replacer(
tagfinder, content, tagfinder, content,
[&](std::smatch &match) [&](std::smatch &match)
@ -182,6 +187,11 @@ std::string Parser::parse(const PageDao &pagedao, UrlProvider &provider, const s
{ {
content = parse(pagedao, provider, content, callback); content = parse(pagedao, provider, content, callback);
} }
/* [content] just helps extracting the actual content of a page, pretty much noop otherwise */
if(tag == "content")
{
return parse(pagedao, provider, content, callback);
}
if(std::find(std::begin(justreplace), std::end(justreplace), tag) != std::end(justreplace)) if(std::find(std::begin(justreplace), std::end(justreplace), tag) != std::end(justreplace))
{ {
return "<" + tag + ">" + content + "</" + tag + ">"; return "<" + tag + ">" + content + "</" + tag + ">";

مشاهده پرونده

@ -9,6 +9,7 @@ class Parser : public IParser
std::string processImage(std::smatch &match) const; std::string processImage(std::smatch &match) const;
public: public:
std::string extractFirstTag(std::string tagname, const std::string &content) const override;
std::string extractCommand(std::string cmdname, const std::string &content) const override; std::string extractCommand(std::string cmdname, const std::string &content) const override;
std::vector<std::string> extractCommands(std::string cmdname, const std::string &content) const override; std::vector<std::string> extractCommands(std::string cmdname, const std::string &content) const override;
std::vector<Headline> extractHeadlines(const std::string &content) const override; std::vector<Headline> extractHeadlines(const std::string &content) const override;