From ad84c8acf71d3f165993d3ff7d299d5e3048df93 Mon Sep 17 00:00:00 2001 From: Albert S Date: Sat, 7 Aug 2021 18:38:23 +0200 Subject: [PATCH] cli: moved processing of file content into sandboxed subprocess --- cli/cli.pro | 3 ++ cli/filesaver.cpp | 54 ++++++++++--------- cli/main.cpp | 15 ++++++ cli/pagedata.cpp | 13 +++++ cli/pagedata.h | 10 ++++ cli/processor.h | 2 + cli/sandboxedprocessor.cpp | 103 +++++++++++++++++++++++++++++++++++++ cli/sandboxedprocessor.h | 23 +++++++++ 8 files changed, 198 insertions(+), 25 deletions(-) create mode 100644 cli/pagedata.cpp create mode 100644 cli/sandboxedprocessor.cpp create mode 100644 cli/sandboxedprocessor.h diff --git a/cli/cli.pro b/cli/cli.pro index 9c985c7..2430ed3 100644 --- a/cli/cli.pro +++ b/cli/cli.pro @@ -18,10 +18,12 @@ LIBS += -luchardet -lpoppler-qt5 -lquazip5 SOURCES += \ main.cpp \ encodingdetector.cpp \ + pagedata.cpp \ processor.cpp \ pdfprocessor.cpp \ defaulttextprocessor.cpp \ commandadd.cpp \ + sandboxedprocessor.cpp \ tagstripperprocessor.cpp \ nothingprocessor.cpp \ odtprocessor.cpp \ @@ -44,6 +46,7 @@ HEADERS += \ defaulttextprocessor.h \ command.h \ commandadd.h \ + sandboxedprocessor.h \ tagstripperprocessor.h \ nothingprocessor.h \ odtprocessor.h \ diff --git a/cli/filesaver.cpp b/cli/filesaver.cpp index 2ae1c14..a0b2d94 100644 --- a/cli/filesaver.cpp +++ b/cli/filesaver.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include "filesaver.h" #include "processor.h" @@ -13,18 +14,6 @@ #include "odsprocessor.h" #include "utils.h" #include "logger.h" -static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor(); -static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor(); -static NothingProcessor *nothingProcessor = new NothingProcessor(); -static OdtProcessor *odtProcessor = new OdtProcessor(); -static OdsProcessor *odsProcessor = new OdsProcessor(); - -static QMap processors{ - {"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor}, - {"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor}, - {"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor}, - {"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor}, - {"ods", odsProcessor}}; FileSaver::FileSaver(SqliteDbService &dbService) { @@ -106,32 +95,47 @@ int FileSaver::processFiles(const QVector paths, std::function pageData; QString absPath = fileInfo.absoluteFilePath(); + int status = -1; if(fileInfo.isFile()) { - try + QProcess process; + QStringList args; + args << "process" << absPath; + process.setProcessChannelMode(QProcess::ForwardedErrorChannel); + process.start("/proc/self/exe", args); + process.waitForStarted(); + process.waitForFinished(); + + /* TODO: This is suboptimal as it eats lots of mem + * but avoids a weird QDataStream/QProcess behaviour + * where it thinks the process has ended when it has not... + * + * Also, there seem to be issues with reads not being blocked, so + * the only reliable way appears to be waiting until the process + * finishes. + */ + QDataStream in(process.readAllStandardOutput()); + while(!in.atEnd()) { - if(processor->PREFERED_DATA_SOURCE == FILEPATH) - { - pageData = processor->process(absPath); - } - else - { - pageData = processor->process(Utils::readFile(absPath)); - } + PageData pd; + in >> pd; + pageData.append(pd); } - catch(LooqsGeneralException &e) + status = process.exitCode(); + if(status != 0) { - Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl; + Logger::error() << "Error while processing" << absPath << ":" + << "Exit code " << status << Qt::endl; + return PROCESSFAIL; } } // Could happen if a file corrupted for example - if(pageData.isEmpty() && processor != nothingProcessor) + if(pageData.isEmpty() && status != NOTHING_PROCESSED) { Logger::error() << "Could not get any content for " << absPath << Qt::endl; } diff --git a/cli/main.cpp b/cli/main.cpp index a490269..9439b1d 100644 --- a/cli/main.cpp +++ b/cli/main.cpp @@ -24,6 +24,7 @@ #include "commandsearch.h" #include "databasefactory.h" #include "logger.h" +#include "sandboxedprocessor.h" #include "../shared/common.h" void printUsage(QString argv0) @@ -59,6 +60,7 @@ int main(int argc, char *argv[]) QCoreApplication app(argc, argv); QStringList args = app.arguments(); QString argv0 = args.takeFirst(); + if(args.length() < 1) { printUsage(argv0); @@ -74,11 +76,24 @@ int main(int argc, char *argv[]) Logger::error() << "Error: " << e.message; return 1; } + qRegisterMetaType(); QString connectionString = Common::databasePath(); DatabaseFactory dbFactory(connectionString); SqliteDbService dbService(dbFactory); QString commandName = args.first(); + if(commandName == "process") + { + if(args.length() < 1) + { + qDebug() << "Filename is required"; + return 1; + } + + QString file = args.at(1); + SandboxedProcessor processor(file); + return processor.process(); + } Command *cmd = commandFromName(commandName, dbService); if(cmd != nullptr) { diff --git a/cli/pagedata.cpp b/cli/pagedata.cpp new file mode 100644 index 0000000..6d7a19e --- /dev/null +++ b/cli/pagedata.cpp @@ -0,0 +1,13 @@ +#include "pagedata.h" + +QDataStream &operator<<(QDataStream &out, const PageData &pd) +{ + out << pd.pagenumber << pd.content; + return out; +} + +QDataStream &operator>>(QDataStream &in, PageData &pd) +{ + in >> pd.pagenumber >> pd.content; + return in; +} diff --git a/cli/pagedata.h b/cli/pagedata.h index d550116..8697d56 100644 --- a/cli/pagedata.h +++ b/cli/pagedata.h @@ -1,6 +1,9 @@ #ifndef PAGEDATA_H #define PAGEDATA_H #include +#include +#include + class PageData { public: @@ -10,10 +13,17 @@ class PageData PageData() { } + PageData(unsigned int pagenumber, QString content) { this->pagenumber = pagenumber; this->content = content; } }; + +Q_DECLARE_METATYPE(PageData); + +QDataStream &operator<<(QDataStream &out, const PageData &pd); +QDataStream &operator>>(QDataStream &in, PageData &pd); + #endif // PAGEDATA_H diff --git a/cli/processor.h b/cli/processor.h index 04bd04a..82f1c27 100644 --- a/cli/processor.h +++ b/cli/processor.h @@ -10,6 +10,8 @@ enum DataSource ARRAY }; +#define NOTHING_PROCESSED 4 + class Processor { public: diff --git a/cli/sandboxedprocessor.cpp b/cli/sandboxedprocessor.cpp new file mode 100644 index 0000000..3341c57 --- /dev/null +++ b/cli/sandboxedprocessor.cpp @@ -0,0 +1,103 @@ +#include +#include +#include +#include "sandboxedprocessor.h" +#include "pdfprocessor.h" +#include "defaulttextprocessor.h" +#include "tagstripperprocessor.h" +#include "nothingprocessor.h" +#include "odtprocessor.h" +#include "odsprocessor.h" +#include "../submodules/qssb.h/qssb.h" +#include "logger.h" + +static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor(); +static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor(); +static NothingProcessor *nothingProcessor = new NothingProcessor(); +static OdtProcessor *odtProcessor = new OdtProcessor(); +static OdsProcessor *odsProcessor = new OdsProcessor(); + +static QMap processors{ + {"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor}, + {"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor}, + {"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor}, + {"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor}, + {"ods", odsProcessor}}; + +void SandboxedProcessor::enableSandbox(QString readablePath) +{ + struct qssb_policy *policy = qssb_init_policy(); + + policy->namespace_options = QSSB_UNSHARE_NETWORK | QSSB_UNSHARE_USER; + + if(!readablePath.isEmpty()) + { + std::string readablePathLocation = readablePath.toStdString(); + qssb_append_path_policy(policy, QSSB_FS_ALLOW_READ, readablePathLocation.c_str()); + } + else + { + policy->no_fs = 1; + } + int ret = qssb_enable_policy(policy); + if(ret != 0) + { + qDebug() << "Failed to establish sandbox: " << ret; + exit(EXIT_FAILURE); + } + qssb_free_policy(policy); +} + +void SandboxedProcessor::printResults(const QVector &pageData) +{ + QFile fsstdout; + fsstdout.open(stdout, QIODevice::WriteOnly); + QDataStream stream(&fsstdout); + + for(const PageData &data : pageData) + { + stream << data; + // fsstdout.flush(); + } + + fsstdout.close(); +} + +int SandboxedProcessor::process() +{ + QFileInfo fileInfo(this->filePath); + Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor); + + if(processor == nothingProcessor) + { + /* Nothing to do */ + return NOTHING_PROCESSED; + } + + QVector pageData; + QString absPath = fileInfo.absoluteFilePath(); + + try + { + if(processor->PREFERED_DATA_SOURCE == FILEPATH) + { + /* Read access to FS needed... doh..*/ + enableSandbox(absPath); + pageData = processor->process(absPath); + } + else + { + QByteArray data = Utils::readFile(absPath); + enableSandbox(); + pageData = processor->process(data); + } + } + catch(LooqsGeneralException &e) + { + Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl; + return 3 /* PROCESSFAIL */; + } + + printResults(pageData); + return 0; +} diff --git a/cli/sandboxedprocessor.h b/cli/sandboxedprocessor.h new file mode 100644 index 0000000..416d2e0 --- /dev/null +++ b/cli/sandboxedprocessor.h @@ -0,0 +1,23 @@ +#ifndef SANDBOXEDPROCESSOR_H +#define SANDBOXEDPROCESSOR_H +#include +#include "pagedata.h" + +class SandboxedProcessor +{ + private: + QString filePath; + + void enableSandbox(QString readablePath = ""); + void printResults(const QVector &pageData); + + public: + SandboxedProcessor(QString filepath) + { + this->filePath = filepath; + } + + int process(); +}; + +#endif // SANDBOXEDPROCESSOR_H