cli: moved processing of file content into sandboxed subprocess

This commit is contained in:
Albert S. 2021-08-07 18:38:23 +02:00
parent ebea074fcb
commit ad84c8acf7
8 changed files with 198 additions and 25 deletions

View File

@ -18,10 +18,12 @@ LIBS += -luchardet -lpoppler-qt5 -lquazip5
SOURCES += \ SOURCES += \
main.cpp \ main.cpp \
encodingdetector.cpp \ encodingdetector.cpp \
pagedata.cpp \
processor.cpp \ processor.cpp \
pdfprocessor.cpp \ pdfprocessor.cpp \
defaulttextprocessor.cpp \ defaulttextprocessor.cpp \
commandadd.cpp \ commandadd.cpp \
sandboxedprocessor.cpp \
tagstripperprocessor.cpp \ tagstripperprocessor.cpp \
nothingprocessor.cpp \ nothingprocessor.cpp \
odtprocessor.cpp \ odtprocessor.cpp \
@ -44,6 +46,7 @@ HEADERS += \
defaulttextprocessor.h \ defaulttextprocessor.h \
command.h \ command.h \
commandadd.h \ commandadd.h \
sandboxedprocessor.h \
tagstripperprocessor.h \ tagstripperprocessor.h \
nothingprocessor.h \ nothingprocessor.h \
odtprocessor.h \ odtprocessor.h \

View File

@ -1,6 +1,7 @@
#include <QSqlError> #include <QSqlError>
#include <QDateTime> #include <QDateTime>
#include <QtConcurrentMap> #include <QtConcurrentMap>
#include <QProcess>
#include <functional> #include <functional>
#include "filesaver.h" #include "filesaver.h"
#include "processor.h" #include "processor.h"
@ -13,18 +14,6 @@
#include "odsprocessor.h" #include "odsprocessor.h"
#include "utils.h" #include "utils.h"
#include "logger.h" #include "logger.h"
static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
static NothingProcessor *nothingProcessor = new NothingProcessor();
static OdtProcessor *odtProcessor = new OdtProcessor();
static OdsProcessor *odsProcessor = new OdsProcessor();
static QMap<QString, Processor *> processors{
{"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor},
{"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor},
{"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor},
{"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor},
{"ods", odsProcessor}};
FileSaver::FileSaver(SqliteDbService &dbService) FileSaver::FileSaver(SqliteDbService &dbService)
{ {
@ -106,32 +95,47 @@ int FileSaver::processFiles(const QVector<QString> paths, std::function<SaveFile
SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo) SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
{ {
Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor);
QVector<PageData> pageData; QVector<PageData> pageData;
QString absPath = fileInfo.absoluteFilePath(); QString absPath = fileInfo.absoluteFilePath();
int status = -1;
if(fileInfo.isFile()) if(fileInfo.isFile())
{ {
try QProcess process;
QStringList args;
args << "process" << absPath;
process.setProcessChannelMode(QProcess::ForwardedErrorChannel);
process.start("/proc/self/exe", args);
process.waitForStarted();
process.waitForFinished();
/* TODO: This is suboptimal as it eats lots of mem
* but avoids a weird QDataStream/QProcess behaviour
* where it thinks the process has ended when it has not...
*
* Also, there seem to be issues with reads not being blocked, so
* the only reliable way appears to be waiting until the process
* finishes.
*/
QDataStream in(process.readAllStandardOutput());
while(!in.atEnd())
{ {
if(processor->PREFERED_DATA_SOURCE == FILEPATH) PageData pd;
{ in >> pd;
pageData = processor->process(absPath); pageData.append(pd);
} }
else status = process.exitCode();
if(status != 0)
{ {
pageData = processor->process(Utils::readFile(absPath)); Logger::error() << "Error while processing" << absPath << ":"
} << "Exit code " << status << Qt::endl;
}
catch(LooqsGeneralException &e)
{
Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl;
return PROCESSFAIL; return PROCESSFAIL;
} }
} }
// Could happen if a file corrupted for example // Could happen if a file corrupted for example
if(pageData.isEmpty() && processor != nothingProcessor) if(pageData.isEmpty() && status != NOTHING_PROCESSED)
{ {
Logger::error() << "Could not get any content for " << absPath << Qt::endl; Logger::error() << "Could not get any content for " << absPath << Qt::endl;
} }

View File

@ -24,6 +24,7 @@
#include "commandsearch.h" #include "commandsearch.h"
#include "databasefactory.h" #include "databasefactory.h"
#include "logger.h" #include "logger.h"
#include "sandboxedprocessor.h"
#include "../shared/common.h" #include "../shared/common.h"
void printUsage(QString argv0) void printUsage(QString argv0)
@ -59,6 +60,7 @@ int main(int argc, char *argv[])
QCoreApplication app(argc, argv); QCoreApplication app(argc, argv);
QStringList args = app.arguments(); QStringList args = app.arguments();
QString argv0 = args.takeFirst(); QString argv0 = args.takeFirst();
if(args.length() < 1) if(args.length() < 1)
{ {
printUsage(argv0); printUsage(argv0);
@ -74,11 +76,24 @@ int main(int argc, char *argv[])
Logger::error() << "Error: " << e.message; Logger::error() << "Error: " << e.message;
return 1; return 1;
} }
qRegisterMetaType<PageData>();
QString connectionString = Common::databasePath(); QString connectionString = Common::databasePath();
DatabaseFactory dbFactory(connectionString); DatabaseFactory dbFactory(connectionString);
SqliteDbService dbService(dbFactory); SqliteDbService dbService(dbFactory);
QString commandName = args.first(); QString commandName = args.first();
if(commandName == "process")
{
if(args.length() < 1)
{
qDebug() << "Filename is required";
return 1;
}
QString file = args.at(1);
SandboxedProcessor processor(file);
return processor.process();
}
Command *cmd = commandFromName(commandName, dbService); Command *cmd = commandFromName(commandName, dbService);
if(cmd != nullptr) if(cmd != nullptr)
{ {

13
cli/pagedata.cpp Normal file
View File

@ -0,0 +1,13 @@
#include "pagedata.h"
QDataStream &operator<<(QDataStream &out, const PageData &pd)
{
out << pd.pagenumber << pd.content;
return out;
}
QDataStream &operator>>(QDataStream &in, PageData &pd)
{
in >> pd.pagenumber >> pd.content;
return in;
}

View File

@ -1,6 +1,9 @@
#ifndef PAGEDATA_H #ifndef PAGEDATA_H
#define PAGEDATA_H #define PAGEDATA_H
#include <QString> #include <QString>
#include <QMetaType>
#include <QDataStream>
class PageData class PageData
{ {
public: public:
@ -10,10 +13,17 @@ class PageData
PageData() PageData()
{ {
} }
PageData(unsigned int pagenumber, QString content) PageData(unsigned int pagenumber, QString content)
{ {
this->pagenumber = pagenumber; this->pagenumber = pagenumber;
this->content = content; this->content = content;
} }
}; };
Q_DECLARE_METATYPE(PageData);
QDataStream &operator<<(QDataStream &out, const PageData &pd);
QDataStream &operator>>(QDataStream &in, PageData &pd);
#endif // PAGEDATA_H #endif // PAGEDATA_H

View File

@ -10,6 +10,8 @@ enum DataSource
ARRAY ARRAY
}; };
#define NOTHING_PROCESSED 4
class Processor class Processor
{ {
public: public:

103
cli/sandboxedprocessor.cpp Normal file
View File

@ -0,0 +1,103 @@
#include <QFile>
#include <QFileInfo>
#include <QDataStream>
#include "sandboxedprocessor.h"
#include "pdfprocessor.h"
#include "defaulttextprocessor.h"
#include "tagstripperprocessor.h"
#include "nothingprocessor.h"
#include "odtprocessor.h"
#include "odsprocessor.h"
#include "../submodules/qssb.h/qssb.h"
#include "logger.h"
static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
static NothingProcessor *nothingProcessor = new NothingProcessor();
static OdtProcessor *odtProcessor = new OdtProcessor();
static OdsProcessor *odsProcessor = new OdsProcessor();
static QMap<QString, Processor *> processors{
{"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor},
{"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor},
{"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor},
{"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor},
{"ods", odsProcessor}};
void SandboxedProcessor::enableSandbox(QString readablePath)
{
struct qssb_policy *policy = qssb_init_policy();
policy->namespace_options = QSSB_UNSHARE_NETWORK | QSSB_UNSHARE_USER;
if(!readablePath.isEmpty())
{
std::string readablePathLocation = readablePath.toStdString();
qssb_append_path_policy(policy, QSSB_FS_ALLOW_READ, readablePathLocation.c_str());
}
else
{
policy->no_fs = 1;
}
int ret = qssb_enable_policy(policy);
if(ret != 0)
{
qDebug() << "Failed to establish sandbox: " << ret;
exit(EXIT_FAILURE);
}
qssb_free_policy(policy);
}
void SandboxedProcessor::printResults(const QVector<PageData> &pageData)
{
QFile fsstdout;
fsstdout.open(stdout, QIODevice::WriteOnly);
QDataStream stream(&fsstdout);
for(const PageData &data : pageData)
{
stream << data;
// fsstdout.flush();
}
fsstdout.close();
}
int SandboxedProcessor::process()
{
QFileInfo fileInfo(this->filePath);
Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor);
if(processor == nothingProcessor)
{
/* Nothing to do */
return NOTHING_PROCESSED;
}
QVector<PageData> pageData;
QString absPath = fileInfo.absoluteFilePath();
try
{
if(processor->PREFERED_DATA_SOURCE == FILEPATH)
{
/* Read access to FS needed... doh..*/
enableSandbox(absPath);
pageData = processor->process(absPath);
}
else
{
QByteArray data = Utils::readFile(absPath);
enableSandbox();
pageData = processor->process(data);
}
}
catch(LooqsGeneralException &e)
{
Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl;
return 3 /* PROCESSFAIL */;
}
printResults(pageData);
return 0;
}

23
cli/sandboxedprocessor.h Normal file
View File

@ -0,0 +1,23 @@
#ifndef SANDBOXEDPROCESSOR_H
#define SANDBOXEDPROCESSOR_H
#include <QString>
#include "pagedata.h"
class SandboxedProcessor
{
private:
QString filePath;
void enableSandbox(QString readablePath = "");
void printResults(const QVector<PageData> &pageData);
public:
SandboxedProcessor(QString filepath)
{
this->filePath = filepath;
}
int process();
};
#endif // SANDBOXEDPROCESSOR_H