Temporarily switch to boost::regex

In combination with musl, parser crashed. glibc fine. Could not pinpoint
down the exact reason mainly due to time constraints. Strange, in both
cases actually libstdc++ is used, so this is very odd.
This commit is contained in:
Albert S. 2018-11-11 21:28:45 +01:00
джерело 28bf5ab6d4
коміт 882871c34f
5 змінених файлів з 18 додано та 16 видалено

@ -1,8 +1,8 @@
CXXFLAGS=-std=c++17 -O0 -g -pg -no-pie -pipe -MMD -Wall -Wextra CXXFLAGS=-std=c++17 -O0 -g -no-pie -pipe -MMD -Wall -Wextra
RELEASE_CXXFLAGS=-std=c++17 -O3 -pipe -MMD -Wall -Wextra RELEASE_CXXFLAGS=-std=c++17 -O3 -pipe -MMD -Wall -Wextra
LDFLAGS=-lsqlite3 -lpthread -lcrypto -lstdc++fs LDFLAGS=-lsqlite3 -lpthread -lcrypto -lboost_regex -lstdc++fs
#currently default g++ versions in most distros do not usually support c++17 well enough #currently default g++ versions in most distros do not usually support c++17 well enough
CXX=g++-8.2.0 CXX=g++-8.2.0

@ -27,7 +27,6 @@ SOFTWARE.
#include "parser.h" #include "parser.h"
#include "utils.h" #include "utils.h"
#include "htmllink.h" #include "htmllink.h"
std::vector<Headline> Parser::extractHeadlines(std::string content) const std::vector<Headline> Parser::extractHeadlines(std::string content) const
{ {
std::vector<Headline> result; std::vector<Headline> result;
@ -82,7 +81,7 @@ std::string Parser::extractCommand(std::string cmdname, std::string content) con
} }
return ""; return "";
} }
std::string Parser::processLink(const PageDao &pageDao, UrlProvider &urlProvider, std::smatch &match) const std::string Parser::processLink(const PageDao &pageDao, UrlProvider &urlProvider, boost::smatch &match) const
{ {
std::string linktag = match.str(1); std::string linktag = match.str(1);
std::string inside = match.str(2); std::string inside = match.str(2);
@ -121,8 +120,8 @@ std::string Parser::parse(const PageDao &pagedao, UrlProvider &provider, std::st
{ {
std::string result; std::string result;
// we don't care about commands, but we nevertheless replace them with empty strings // we don't care about commands, but we nevertheless replace them with empty strings
std::regex tagfinder(R"(\[(b|i|u|li||ul|ol|link|wikilink|h\d|cmd:rename|cmd:redirect)*?\]((\s|\S)*?)\[/\1])"); boost::regex tagfinder(R"(\[(b|i|u|li||ul|ol|link|wikilink|h\d|cmd:rename|cmd:redirect)*?\]((\s|\S)*?)\[/\1])");
result = utils::regex_callback_replacer(tagfinder, content, [&](std::smatch &match) { result = utils::regex_callback_replacer(tagfinder, content, [&](boost::smatch &match) {
std::string tag = match.str(1); std::string tag = match.str(1);
std::string content = match.str(2); std::string content = match.str(2);
std::string justreplace[] = {"b", "i", "u", "ul", "li", "ol"}; std::string justreplace[] = {"b", "i", "u", "ul", "li", "ol"};

@ -1,12 +1,12 @@
#ifndef PARSER_H #ifndef PARSER_H
#define PARSER_H #define PARSER_H
#include <regex> #include <boost/regex.hpp>
#include "iparser.h" #include "iparser.h"
class Parser : public IParser class Parser : public IParser
{ {
private: private:
std::string processLink(const PageDao &pageDao, UrlProvider &urlProvider, std::smatch &match) const; std::string processLink(const PageDao &pageDao, UrlProvider &urlProvider, boost::smatch &match) const;
public: public:
std::string extractCommand(std::string cmdname, std::string content) const; std::string extractCommand(std::string cmdname, std::string content) const;

@ -24,8 +24,10 @@ SOFTWARE.
#include <map> #include <map>
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
#include <boost/regex.hpp>
#include "logger.h" #include "logger.h"
#include "utils.h" #include "utils.h"
// TODO: instead of returning vector maybe provide an iterator version too. // TODO: instead of returning vector maybe provide an iterator version too.
// TODO: % may not be necessary (was in C version just to be sure against format string attacks // TODO: % may not be necessary (was in C version just to be sure against format string attacks
@ -133,16 +135,16 @@ std::string utils::readCompleteFile(std::string_view filepath)
return content; return content;
} }
std::string utils::regex_callback_replacer(std::regex regex, const std::string &input, std::string utils::regex_callback_replacer(boost::regex regex, const std::string &input,
std::function<std::string(std::smatch &)> callback) std::function<std::string(boost::smatch &)> callback)
{ {
std::string result; std::string result;
auto tagsbegin = std::sregex_iterator(input.begin(), input.end(), regex); auto tagsbegin = boost::sregex_iterator(input.begin(), input.end(), regex);
auto tagsend = std::sregex_iterator(); auto tagsend = boost::sregex_iterator();
auto matchbegin = 0; auto matchbegin = 0;
for(std::sregex_iterator i = tagsbegin; i != tagsend; ++i) for(boost::sregex_iterator i = tagsbegin; i != tagsend; ++i)
{ {
std::smatch match = *i; boost::smatch match = *i;
auto matchlength = match.length(0); auto matchlength = match.length(0);
auto matchpos = match.position(); auto matchpos = match.position();

@ -8,6 +8,7 @@
#include <map> #include <map>
#include <regex> #include <regex>
#include <ctime> #include <ctime>
#include <boost/regex.hpp>
namespace utils namespace utils
{ {
@ -57,8 +58,8 @@ template <class T, class U> std::vector<U> getAll(std::multimap<T, U> map, T key
return result; return result;
} }
std::string regex_callback_replacer(std::regex regex, const std::string &input, std::string regex_callback_replacer(boost::regex regex, const std::string &input,
std::function<std::string(std::smatch &)> callback); std::function<std::string(boost::smatch &)> callback);
std::string readCompleteFile(std::string_view filepath); std::string readCompleteFile(std::string_view filepath);