From 579fadfb10dd6c894736275a2c4447ee53338429 Mon Sep 17 00:00:00 2001 From: "Albert S." Date: Mon, 27 Nov 2023 21:37:54 +0100 Subject: [PATCH] parser: Add [content] tag, add extractFirstTag() method --- iparser.h | 1 + parser.cpp | 20 +++++++++++++++----- parser.h | 1 + 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/iparser.h b/iparser.h index 0b7dc69..3dc42c4 100644 --- a/iparser.h +++ b/iparser.h @@ -15,6 +15,7 @@ class IParser } public: + virtual std::string extractFirstTag(std::string tagname, const std::string &content) const = 0; virtual std::string extractCommand(std::string cmdname, const std::string &content) const = 0; virtual std::vector extractCommands(std::string cmdname, const std::string &content) const = 0; diff --git a/parser.cpp b/parser.cpp index 6d6c517..ec7b263 100644 --- a/parser.cpp +++ b/parser.cpp @@ -63,11 +63,10 @@ std::vector Parser::extractCategories(const std::string &content) c return result; } -std::string Parser::extractCommand(std::string cmdname, const std::string &content) const +std::string Parser::extractFirstTag(std::string tagname, const std::string &content) const { - std::string cmd = "[cmd:" + cmdname + "]"; - std::string cmdend = "[/cmd:" + cmdname + "]"; - + std::string cmd = "[" + tagname + "]"; + std::string cmdend = "[/" + tagname + "]"; std::string_view view = content; size_t pos = 0; if((pos = view.find(cmd)) != std::string::npos) @@ -83,6 +82,12 @@ std::string Parser::extractCommand(std::string cmdname, const std::string &conte return ""; } +std::string Parser::extractCommand(std::string cmdname, const std::string &content) const +{ + + return extractFirstTag("cmd:" + cmdname, content); +} + std::vector Parser::extractCommands(std::string cmdname, const std::string &content) const { std::vector result; @@ -170,7 +175,7 @@ std::string Parser::parse(const PageDao &pagedao, UrlProvider &provider, const s std::string result; // we don't care about commands, but we nevertheless replace them with empty strings std::regex tagfinder( - R"(\[(b|i|u|s|li||ul|ol|code|blockquote|img|link|wikilink|h\d|cmd:visible|cmd:rename|cmd:redirect|cmd:pagetitle|cmd:allowinclude|cmd:permissions|category|dynamic:postlist|dynamic:includepage|dynamic:getvar|dynamic:setvar)*?\]((\s|\S)*?)\[/\1])"); + R"(\[(b|i|u|s|li||ul|ol|code|blockquote|img|link|wikilink|h\d|cmd:visible|cmd:rename|cmd:redirect|cmd:pagetitle|cmd:allowinclude|cmd:permissions|cmd:parentpage|content|category|dynamic:postlist|dynamic:includepage|dynamic:getvar|dynamic:setvar)*?\]((\s|\S)*?)\[/\1])"); result = utils::regex_callback_replacer( tagfinder, content, [&](std::smatch &match) @@ -182,6 +187,11 @@ std::string Parser::parse(const PageDao &pagedao, UrlProvider &provider, const s { content = parse(pagedao, provider, content, callback); } + /* [content] just helps extracting the actual content of a page, pretty much noop otherwise */ + if(tag == "content") + { + return parse(pagedao, provider, content, callback); + } if(std::find(std::begin(justreplace), std::end(justreplace), tag) != std::end(justreplace)) { return "<" + tag + ">" + content + ""; diff --git a/parser.h b/parser.h index f47d22c..666b473 100644 --- a/parser.h +++ b/parser.h @@ -9,6 +9,7 @@ class Parser : public IParser std::string processImage(std::smatch &match) const; public: + std::string extractFirstTag(std::string tagname, const std::string &content) const override; std::string extractCommand(std::string cmdname, const std::string &content) const override; std::vector extractCommands(std::string cmdname, const std::string &content) const override; std::vector extractHeadlines(const std::string &content) const override;