cli: moved processing of file content into sandboxed subprocess

This commit is contained in:
Albert S. 2021-08-07 18:38:23 +02:00
rodzic ebea074fcb
commit ad84c8acf7
8 zmienionych plików z 198 dodań i 25 usunięć

Wyświetl plik

@ -18,10 +18,12 @@ LIBS += -luchardet -lpoppler-qt5 -lquazip5
SOURCES += \
main.cpp \
encodingdetector.cpp \
pagedata.cpp \
processor.cpp \
pdfprocessor.cpp \
defaulttextprocessor.cpp \
commandadd.cpp \
sandboxedprocessor.cpp \
tagstripperprocessor.cpp \
nothingprocessor.cpp \
odtprocessor.cpp \
@ -44,6 +46,7 @@ HEADERS += \
defaulttextprocessor.h \
command.h \
commandadd.h \
sandboxedprocessor.h \
tagstripperprocessor.h \
nothingprocessor.h \
odtprocessor.h \

Wyświetl plik

@ -1,6 +1,7 @@
#include <QSqlError>
#include <QDateTime>
#include <QtConcurrentMap>
#include <QProcess>
#include <functional>
#include "filesaver.h"
#include "processor.h"
@ -13,18 +14,6 @@
#include "odsprocessor.h"
#include "utils.h"
#include "logger.h"
static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
static NothingProcessor *nothingProcessor = new NothingProcessor();
static OdtProcessor *odtProcessor = new OdtProcessor();
static OdsProcessor *odsProcessor = new OdsProcessor();
static QMap<QString, Processor *> processors{
{"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor},
{"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor},
{"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor},
{"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor},
{"ods", odsProcessor}};
FileSaver::FileSaver(SqliteDbService &dbService)
{
@ -106,32 +95,47 @@ int FileSaver::processFiles(const QVector<QString> paths, std::function<SaveFile
SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
{
Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor);
QVector<PageData> pageData;
QString absPath = fileInfo.absoluteFilePath();
int status = -1;
if(fileInfo.isFile())
{
try
QProcess process;
QStringList args;
args << "process" << absPath;
process.setProcessChannelMode(QProcess::ForwardedErrorChannel);
process.start("/proc/self/exe", args);
process.waitForStarted();
process.waitForFinished();
/* TODO: This is suboptimal as it eats lots of mem
* but avoids a weird QDataStream/QProcess behaviour
* where it thinks the process has ended when it has not...
*
* Also, there seem to be issues with reads not being blocked, so
* the only reliable way appears to be waiting until the process
* finishes.
*/
QDataStream in(process.readAllStandardOutput());
while(!in.atEnd())
{
if(processor->PREFERED_DATA_SOURCE == FILEPATH)
{
pageData = processor->process(absPath);
}
else
{
pageData = processor->process(Utils::readFile(absPath));
}
PageData pd;
in >> pd;
pageData.append(pd);
}
catch(LooqsGeneralException &e)
status = process.exitCode();
if(status != 0)
{
Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl;
Logger::error() << "Error while processing" << absPath << ":"
<< "Exit code " << status << Qt::endl;
return PROCESSFAIL;
}
}
// Could happen if a file corrupted for example
if(pageData.isEmpty() && processor != nothingProcessor)
if(pageData.isEmpty() && status != NOTHING_PROCESSED)
{
Logger::error() << "Could not get any content for " << absPath << Qt::endl;
}

Wyświetl plik

@ -24,6 +24,7 @@
#include "commandsearch.h"
#include "databasefactory.h"
#include "logger.h"
#include "sandboxedprocessor.h"
#include "../shared/common.h"
void printUsage(QString argv0)
@ -59,6 +60,7 @@ int main(int argc, char *argv[])
QCoreApplication app(argc, argv);
QStringList args = app.arguments();
QString argv0 = args.takeFirst();
if(args.length() < 1)
{
printUsage(argv0);
@ -74,11 +76,24 @@ int main(int argc, char *argv[])
Logger::error() << "Error: " << e.message;
return 1;
}
qRegisterMetaType<PageData>();
QString connectionString = Common::databasePath();
DatabaseFactory dbFactory(connectionString);
SqliteDbService dbService(dbFactory);
QString commandName = args.first();
if(commandName == "process")
{
if(args.length() < 1)
{
qDebug() << "Filename is required";
return 1;
}
QString file = args.at(1);
SandboxedProcessor processor(file);
return processor.process();
}
Command *cmd = commandFromName(commandName, dbService);
if(cmd != nullptr)
{

13
cli/pagedata.cpp Normal file
Wyświetl plik

@ -0,0 +1,13 @@
#include "pagedata.h"
QDataStream &operator<<(QDataStream &out, const PageData &pd)
{
out << pd.pagenumber << pd.content;
return out;
}
QDataStream &operator>>(QDataStream &in, PageData &pd)
{
in >> pd.pagenumber >> pd.content;
return in;
}

Wyświetl plik

@ -1,6 +1,9 @@
#ifndef PAGEDATA_H
#define PAGEDATA_H
#include <QString>
#include <QMetaType>
#include <QDataStream>
class PageData
{
public:
@ -10,10 +13,17 @@ class PageData
PageData()
{
}
PageData(unsigned int pagenumber, QString content)
{
this->pagenumber = pagenumber;
this->content = content;
}
};
Q_DECLARE_METATYPE(PageData);
QDataStream &operator<<(QDataStream &out, const PageData &pd);
QDataStream &operator>>(QDataStream &in, PageData &pd);
#endif // PAGEDATA_H

Wyświetl plik

@ -10,6 +10,8 @@ enum DataSource
ARRAY
};
#define NOTHING_PROCESSED 4
class Processor
{
public:

103
cli/sandboxedprocessor.cpp Normal file
Wyświetl plik

@ -0,0 +1,103 @@
#include <QFile>
#include <QFileInfo>
#include <QDataStream>
#include "sandboxedprocessor.h"
#include "pdfprocessor.h"
#include "defaulttextprocessor.h"
#include "tagstripperprocessor.h"
#include "nothingprocessor.h"
#include "odtprocessor.h"
#include "odsprocessor.h"
#include "../submodules/qssb.h/qssb.h"
#include "logger.h"
static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
static NothingProcessor *nothingProcessor = new NothingProcessor();
static OdtProcessor *odtProcessor = new OdtProcessor();
static OdsProcessor *odsProcessor = new OdsProcessor();
static QMap<QString, Processor *> processors{
{"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor},
{"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor},
{"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor},
{"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor},
{"ods", odsProcessor}};
void SandboxedProcessor::enableSandbox(QString readablePath)
{
struct qssb_policy *policy = qssb_init_policy();
policy->namespace_options = QSSB_UNSHARE_NETWORK | QSSB_UNSHARE_USER;
if(!readablePath.isEmpty())
{
std::string readablePathLocation = readablePath.toStdString();
qssb_append_path_policy(policy, QSSB_FS_ALLOW_READ, readablePathLocation.c_str());
}
else
{
policy->no_fs = 1;
}
int ret = qssb_enable_policy(policy);
if(ret != 0)
{
qDebug() << "Failed to establish sandbox: " << ret;
exit(EXIT_FAILURE);
}
qssb_free_policy(policy);
}
void SandboxedProcessor::printResults(const QVector<PageData> &pageData)
{
QFile fsstdout;
fsstdout.open(stdout, QIODevice::WriteOnly);
QDataStream stream(&fsstdout);
for(const PageData &data : pageData)
{
stream << data;
// fsstdout.flush();
}
fsstdout.close();
}
int SandboxedProcessor::process()
{
QFileInfo fileInfo(this->filePath);
Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor);
if(processor == nothingProcessor)
{
/* Nothing to do */
return NOTHING_PROCESSED;
}
QVector<PageData> pageData;
QString absPath = fileInfo.absoluteFilePath();
try
{
if(processor->PREFERED_DATA_SOURCE == FILEPATH)
{
/* Read access to FS needed... doh..*/
enableSandbox(absPath);
pageData = processor->process(absPath);
}
else
{
QByteArray data = Utils::readFile(absPath);
enableSandbox();
pageData = processor->process(data);
}
}
catch(LooqsGeneralException &e)
{
Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl;
return 3 /* PROCESSFAIL */;
}
printResults(pageData);
return 0;
}

23
cli/sandboxedprocessor.h Normal file
Wyświetl plik

@ -0,0 +1,23 @@
#ifndef SANDBOXEDPROCESSOR_H
#define SANDBOXEDPROCESSOR_H
#include <QString>
#include "pagedata.h"
class SandboxedProcessor
{
private:
QString filePath;
void enableSandbox(QString readablePath = "");
void printResults(const QVector<PageData> &pageData);
public:
SandboxedProcessor(QString filepath)
{
this->filePath = filepath;
}
int process();
};
#endif // SANDBOXEDPROCESSOR_H