cli: moved processing of file content into sandboxed subprocess
This commit is contained in:
bovenliggende
ebea074fcb
commit
ad84c8acf7
@ -18,10 +18,12 @@ LIBS += -luchardet -lpoppler-qt5 -lquazip5
|
||||
SOURCES += \
|
||||
main.cpp \
|
||||
encodingdetector.cpp \
|
||||
pagedata.cpp \
|
||||
processor.cpp \
|
||||
pdfprocessor.cpp \
|
||||
defaulttextprocessor.cpp \
|
||||
commandadd.cpp \
|
||||
sandboxedprocessor.cpp \
|
||||
tagstripperprocessor.cpp \
|
||||
nothingprocessor.cpp \
|
||||
odtprocessor.cpp \
|
||||
@ -44,6 +46,7 @@ HEADERS += \
|
||||
defaulttextprocessor.h \
|
||||
command.h \
|
||||
commandadd.h \
|
||||
sandboxedprocessor.h \
|
||||
tagstripperprocessor.h \
|
||||
nothingprocessor.h \
|
||||
odtprocessor.h \
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <QSqlError>
|
||||
#include <QDateTime>
|
||||
#include <QtConcurrentMap>
|
||||
#include <QProcess>
|
||||
#include <functional>
|
||||
#include "filesaver.h"
|
||||
#include "processor.h"
|
||||
@ -13,18 +14,6 @@
|
||||
#include "odsprocessor.h"
|
||||
#include "utils.h"
|
||||
#include "logger.h"
|
||||
static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
|
||||
static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
|
||||
static NothingProcessor *nothingProcessor = new NothingProcessor();
|
||||
static OdtProcessor *odtProcessor = new OdtProcessor();
|
||||
static OdsProcessor *odsProcessor = new OdsProcessor();
|
||||
|
||||
static QMap<QString, Processor *> processors{
|
||||
{"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor},
|
||||
{"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor},
|
||||
{"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor},
|
||||
{"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor},
|
||||
{"ods", odsProcessor}};
|
||||
|
||||
FileSaver::FileSaver(SqliteDbService &dbService)
|
||||
{
|
||||
@ -106,32 +95,47 @@ int FileSaver::processFiles(const QVector<QString> paths, std::function<SaveFile
|
||||
|
||||
SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
|
||||
{
|
||||
Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor);
|
||||
QVector<PageData> pageData;
|
||||
QString absPath = fileInfo.absoluteFilePath();
|
||||
|
||||
int status = -1;
|
||||
if(fileInfo.isFile())
|
||||
{
|
||||
try
|
||||
QProcess process;
|
||||
QStringList args;
|
||||
args << "process" << absPath;
|
||||
process.setProcessChannelMode(QProcess::ForwardedErrorChannel);
|
||||
process.start("/proc/self/exe", args);
|
||||
process.waitForStarted();
|
||||
process.waitForFinished();
|
||||
|
||||
/* TODO: This is suboptimal as it eats lots of mem
|
||||
* but avoids a weird QDataStream/QProcess behaviour
|
||||
* where it thinks the process has ended when it has not...
|
||||
*
|
||||
* Also, there seem to be issues with reads not being blocked, so
|
||||
* the only reliable way appears to be waiting until the process
|
||||
* finishes.
|
||||
*/
|
||||
QDataStream in(process.readAllStandardOutput());
|
||||
while(!in.atEnd())
|
||||
{
|
||||
if(processor->PREFERED_DATA_SOURCE == FILEPATH)
|
||||
{
|
||||
pageData = processor->process(absPath);
|
||||
}
|
||||
else
|
||||
{
|
||||
pageData = processor->process(Utils::readFile(absPath));
|
||||
}
|
||||
PageData pd;
|
||||
in >> pd;
|
||||
pageData.append(pd);
|
||||
}
|
||||
catch(LooqsGeneralException &e)
|
||||
status = process.exitCode();
|
||||
if(status != 0)
|
||||
{
|
||||
Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl;
|
||||
Logger::error() << "Error while processing" << absPath << ":"
|
||||
<< "Exit code " << status << Qt::endl;
|
||||
|
||||
return PROCESSFAIL;
|
||||
}
|
||||
}
|
||||
|
||||
// Could happen if a file corrupted for example
|
||||
if(pageData.isEmpty() && processor != nothingProcessor)
|
||||
if(pageData.isEmpty() && status != NOTHING_PROCESSED)
|
||||
{
|
||||
Logger::error() << "Could not get any content for " << absPath << Qt::endl;
|
||||
}
|
||||
|
15
cli/main.cpp
15
cli/main.cpp
@ -24,6 +24,7 @@
|
||||
#include "commandsearch.h"
|
||||
#include "databasefactory.h"
|
||||
#include "logger.h"
|
||||
#include "sandboxedprocessor.h"
|
||||
#include "../shared/common.h"
|
||||
|
||||
void printUsage(QString argv0)
|
||||
@ -59,6 +60,7 @@ int main(int argc, char *argv[])
|
||||
QCoreApplication app(argc, argv);
|
||||
QStringList args = app.arguments();
|
||||
QString argv0 = args.takeFirst();
|
||||
|
||||
if(args.length() < 1)
|
||||
{
|
||||
printUsage(argv0);
|
||||
@ -74,11 +76,24 @@ int main(int argc, char *argv[])
|
||||
Logger::error() << "Error: " << e.message;
|
||||
return 1;
|
||||
}
|
||||
qRegisterMetaType<PageData>();
|
||||
|
||||
QString connectionString = Common::databasePath();
|
||||
DatabaseFactory dbFactory(connectionString);
|
||||
SqliteDbService dbService(dbFactory);
|
||||
QString commandName = args.first();
|
||||
if(commandName == "process")
|
||||
{
|
||||
if(args.length() < 1)
|
||||
{
|
||||
qDebug() << "Filename is required";
|
||||
return 1;
|
||||
}
|
||||
|
||||
QString file = args.at(1);
|
||||
SandboxedProcessor processor(file);
|
||||
return processor.process();
|
||||
}
|
||||
Command *cmd = commandFromName(commandName, dbService);
|
||||
if(cmd != nullptr)
|
||||
{
|
||||
|
13
cli/pagedata.cpp
Normal file
13
cli/pagedata.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
#include "pagedata.h"
|
||||
|
||||
QDataStream &operator<<(QDataStream &out, const PageData &pd)
|
||||
{
|
||||
out << pd.pagenumber << pd.content;
|
||||
return out;
|
||||
}
|
||||
|
||||
QDataStream &operator>>(QDataStream &in, PageData &pd)
|
||||
{
|
||||
in >> pd.pagenumber >> pd.content;
|
||||
return in;
|
||||
}
|
@ -1,6 +1,9 @@
|
||||
#ifndef PAGEDATA_H
|
||||
#define PAGEDATA_H
|
||||
#include <QString>
|
||||
#include <QMetaType>
|
||||
#include <QDataStream>
|
||||
|
||||
class PageData
|
||||
{
|
||||
public:
|
||||
@ -10,10 +13,17 @@ class PageData
|
||||
PageData()
|
||||
{
|
||||
}
|
||||
|
||||
PageData(unsigned int pagenumber, QString content)
|
||||
{
|
||||
this->pagenumber = pagenumber;
|
||||
this->content = content;
|
||||
}
|
||||
};
|
||||
|
||||
Q_DECLARE_METATYPE(PageData);
|
||||
|
||||
QDataStream &operator<<(QDataStream &out, const PageData &pd);
|
||||
QDataStream &operator>>(QDataStream &in, PageData &pd);
|
||||
|
||||
#endif // PAGEDATA_H
|
||||
|
@ -10,6 +10,8 @@ enum DataSource
|
||||
ARRAY
|
||||
};
|
||||
|
||||
#define NOTHING_PROCESSED 4
|
||||
|
||||
class Processor
|
||||
{
|
||||
public:
|
||||
|
103
cli/sandboxedprocessor.cpp
Normal file
103
cli/sandboxedprocessor.cpp
Normal file
@ -0,0 +1,103 @@
|
||||
#include <QFile>
|
||||
#include <QFileInfo>
|
||||
#include <QDataStream>
|
||||
#include "sandboxedprocessor.h"
|
||||
#include "pdfprocessor.h"
|
||||
#include "defaulttextprocessor.h"
|
||||
#include "tagstripperprocessor.h"
|
||||
#include "nothingprocessor.h"
|
||||
#include "odtprocessor.h"
|
||||
#include "odsprocessor.h"
|
||||
#include "../submodules/qssb.h/qssb.h"
|
||||
#include "logger.h"
|
||||
|
||||
static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
|
||||
static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
|
||||
static NothingProcessor *nothingProcessor = new NothingProcessor();
|
||||
static OdtProcessor *odtProcessor = new OdtProcessor();
|
||||
static OdsProcessor *odsProcessor = new OdsProcessor();
|
||||
|
||||
static QMap<QString, Processor *> processors{
|
||||
{"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor},
|
||||
{"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor},
|
||||
{"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor},
|
||||
{"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor},
|
||||
{"ods", odsProcessor}};
|
||||
|
||||
void SandboxedProcessor::enableSandbox(QString readablePath)
|
||||
{
|
||||
struct qssb_policy *policy = qssb_init_policy();
|
||||
|
||||
policy->namespace_options = QSSB_UNSHARE_NETWORK | QSSB_UNSHARE_USER;
|
||||
|
||||
if(!readablePath.isEmpty())
|
||||
{
|
||||
std::string readablePathLocation = readablePath.toStdString();
|
||||
qssb_append_path_policy(policy, QSSB_FS_ALLOW_READ, readablePathLocation.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
policy->no_fs = 1;
|
||||
}
|
||||
int ret = qssb_enable_policy(policy);
|
||||
if(ret != 0)
|
||||
{
|
||||
qDebug() << "Failed to establish sandbox: " << ret;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
qssb_free_policy(policy);
|
||||
}
|
||||
|
||||
void SandboxedProcessor::printResults(const QVector<PageData> &pageData)
|
||||
{
|
||||
QFile fsstdout;
|
||||
fsstdout.open(stdout, QIODevice::WriteOnly);
|
||||
QDataStream stream(&fsstdout);
|
||||
|
||||
for(const PageData &data : pageData)
|
||||
{
|
||||
stream << data;
|
||||
// fsstdout.flush();
|
||||
}
|
||||
|
||||
fsstdout.close();
|
||||
}
|
||||
|
||||
int SandboxedProcessor::process()
|
||||
{
|
||||
QFileInfo fileInfo(this->filePath);
|
||||
Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor);
|
||||
|
||||
if(processor == nothingProcessor)
|
||||
{
|
||||
/* Nothing to do */
|
||||
return NOTHING_PROCESSED;
|
||||
}
|
||||
|
||||
QVector<PageData> pageData;
|
||||
QString absPath = fileInfo.absoluteFilePath();
|
||||
|
||||
try
|
||||
{
|
||||
if(processor->PREFERED_DATA_SOURCE == FILEPATH)
|
||||
{
|
||||
/* Read access to FS needed... doh..*/
|
||||
enableSandbox(absPath);
|
||||
pageData = processor->process(absPath);
|
||||
}
|
||||
else
|
||||
{
|
||||
QByteArray data = Utils::readFile(absPath);
|
||||
enableSandbox();
|
||||
pageData = processor->process(data);
|
||||
}
|
||||
}
|
||||
catch(LooqsGeneralException &e)
|
||||
{
|
||||
Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl;
|
||||
return 3 /* PROCESSFAIL */;
|
||||
}
|
||||
|
||||
printResults(pageData);
|
||||
return 0;
|
||||
}
|
23
cli/sandboxedprocessor.h
Normal file
23
cli/sandboxedprocessor.h
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef SANDBOXEDPROCESSOR_H
|
||||
#define SANDBOXEDPROCESSOR_H
|
||||
#include <QString>
|
||||
#include "pagedata.h"
|
||||
|
||||
class SandboxedProcessor
|
||||
{
|
||||
private:
|
||||
QString filePath;
|
||||
|
||||
void enableSandbox(QString readablePath = "");
|
||||
void printResults(const QVector<PageData> &pageData);
|
||||
|
||||
public:
|
||||
SandboxedProcessor(QString filepath)
|
||||
{
|
||||
this->filePath = filepath;
|
||||
}
|
||||
|
||||
int process();
|
||||
};
|
||||
|
||||
#endif // SANDBOXEDPROCESSOR_H
|
Laden…
Verwijs in nieuw issue
Block a user