cli: moved processing of file content into sandboxed subprocess
This commit is contained in:
parent
ebea074fcb
commit
ad84c8acf7
@ -18,10 +18,12 @@ LIBS += -luchardet -lpoppler-qt5 -lquazip5
|
|||||||
SOURCES += \
|
SOURCES += \
|
||||||
main.cpp \
|
main.cpp \
|
||||||
encodingdetector.cpp \
|
encodingdetector.cpp \
|
||||||
|
pagedata.cpp \
|
||||||
processor.cpp \
|
processor.cpp \
|
||||||
pdfprocessor.cpp \
|
pdfprocessor.cpp \
|
||||||
defaulttextprocessor.cpp \
|
defaulttextprocessor.cpp \
|
||||||
commandadd.cpp \
|
commandadd.cpp \
|
||||||
|
sandboxedprocessor.cpp \
|
||||||
tagstripperprocessor.cpp \
|
tagstripperprocessor.cpp \
|
||||||
nothingprocessor.cpp \
|
nothingprocessor.cpp \
|
||||||
odtprocessor.cpp \
|
odtprocessor.cpp \
|
||||||
@ -44,6 +46,7 @@ HEADERS += \
|
|||||||
defaulttextprocessor.h \
|
defaulttextprocessor.h \
|
||||||
command.h \
|
command.h \
|
||||||
commandadd.h \
|
commandadd.h \
|
||||||
|
sandboxedprocessor.h \
|
||||||
tagstripperprocessor.h \
|
tagstripperprocessor.h \
|
||||||
nothingprocessor.h \
|
nothingprocessor.h \
|
||||||
odtprocessor.h \
|
odtprocessor.h \
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <QSqlError>
|
#include <QSqlError>
|
||||||
#include <QDateTime>
|
#include <QDateTime>
|
||||||
#include <QtConcurrentMap>
|
#include <QtConcurrentMap>
|
||||||
|
#include <QProcess>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include "filesaver.h"
|
#include "filesaver.h"
|
||||||
#include "processor.h"
|
#include "processor.h"
|
||||||
@ -13,18 +14,6 @@
|
|||||||
#include "odsprocessor.h"
|
#include "odsprocessor.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "logger.h"
|
#include "logger.h"
|
||||||
static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
|
|
||||||
static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
|
|
||||||
static NothingProcessor *nothingProcessor = new NothingProcessor();
|
|
||||||
static OdtProcessor *odtProcessor = new OdtProcessor();
|
|
||||||
static OdsProcessor *odsProcessor = new OdsProcessor();
|
|
||||||
|
|
||||||
static QMap<QString, Processor *> processors{
|
|
||||||
{"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor},
|
|
||||||
{"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor},
|
|
||||||
{"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor},
|
|
||||||
{"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor},
|
|
||||||
{"ods", odsProcessor}};
|
|
||||||
|
|
||||||
FileSaver::FileSaver(SqliteDbService &dbService)
|
FileSaver::FileSaver(SqliteDbService &dbService)
|
||||||
{
|
{
|
||||||
@ -106,32 +95,47 @@ int FileSaver::processFiles(const QVector<QString> paths, std::function<SaveFile
|
|||||||
|
|
||||||
SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
|
SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
|
||||||
{
|
{
|
||||||
Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor);
|
|
||||||
QVector<PageData> pageData;
|
QVector<PageData> pageData;
|
||||||
QString absPath = fileInfo.absoluteFilePath();
|
QString absPath = fileInfo.absoluteFilePath();
|
||||||
|
|
||||||
|
int status = -1;
|
||||||
if(fileInfo.isFile())
|
if(fileInfo.isFile())
|
||||||
{
|
{
|
||||||
try
|
QProcess process;
|
||||||
|
QStringList args;
|
||||||
|
args << "process" << absPath;
|
||||||
|
process.setProcessChannelMode(QProcess::ForwardedErrorChannel);
|
||||||
|
process.start("/proc/self/exe", args);
|
||||||
|
process.waitForStarted();
|
||||||
|
process.waitForFinished();
|
||||||
|
|
||||||
|
/* TODO: This is suboptimal as it eats lots of mem
|
||||||
|
* but avoids a weird QDataStream/QProcess behaviour
|
||||||
|
* where it thinks the process has ended when it has not...
|
||||||
|
*
|
||||||
|
* Also, there seem to be issues with reads not being blocked, so
|
||||||
|
* the only reliable way appears to be waiting until the process
|
||||||
|
* finishes.
|
||||||
|
*/
|
||||||
|
QDataStream in(process.readAllStandardOutput());
|
||||||
|
while(!in.atEnd())
|
||||||
{
|
{
|
||||||
if(processor->PREFERED_DATA_SOURCE == FILEPATH)
|
PageData pd;
|
||||||
{
|
in >> pd;
|
||||||
pageData = processor->process(absPath);
|
pageData.append(pd);
|
||||||
}
|
}
|
||||||
else
|
status = process.exitCode();
|
||||||
|
if(status != 0)
|
||||||
{
|
{
|
||||||
pageData = processor->process(Utils::readFile(absPath));
|
Logger::error() << "Error while processing" << absPath << ":"
|
||||||
}
|
<< "Exit code " << status << Qt::endl;
|
||||||
}
|
|
||||||
catch(LooqsGeneralException &e)
|
|
||||||
{
|
|
||||||
Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl;
|
|
||||||
return PROCESSFAIL;
|
return PROCESSFAIL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Could happen if a file corrupted for example
|
// Could happen if a file corrupted for example
|
||||||
if(pageData.isEmpty() && processor != nothingProcessor)
|
if(pageData.isEmpty() && status != NOTHING_PROCESSED)
|
||||||
{
|
{
|
||||||
Logger::error() << "Could not get any content for " << absPath << Qt::endl;
|
Logger::error() << "Could not get any content for " << absPath << Qt::endl;
|
||||||
}
|
}
|
||||||
|
15
cli/main.cpp
15
cli/main.cpp
@ -24,6 +24,7 @@
|
|||||||
#include "commandsearch.h"
|
#include "commandsearch.h"
|
||||||
#include "databasefactory.h"
|
#include "databasefactory.h"
|
||||||
#include "logger.h"
|
#include "logger.h"
|
||||||
|
#include "sandboxedprocessor.h"
|
||||||
#include "../shared/common.h"
|
#include "../shared/common.h"
|
||||||
|
|
||||||
void printUsage(QString argv0)
|
void printUsage(QString argv0)
|
||||||
@ -59,6 +60,7 @@ int main(int argc, char *argv[])
|
|||||||
QCoreApplication app(argc, argv);
|
QCoreApplication app(argc, argv);
|
||||||
QStringList args = app.arguments();
|
QStringList args = app.arguments();
|
||||||
QString argv0 = args.takeFirst();
|
QString argv0 = args.takeFirst();
|
||||||
|
|
||||||
if(args.length() < 1)
|
if(args.length() < 1)
|
||||||
{
|
{
|
||||||
printUsage(argv0);
|
printUsage(argv0);
|
||||||
@ -74,11 +76,24 @@ int main(int argc, char *argv[])
|
|||||||
Logger::error() << "Error: " << e.message;
|
Logger::error() << "Error: " << e.message;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
qRegisterMetaType<PageData>();
|
||||||
|
|
||||||
QString connectionString = Common::databasePath();
|
QString connectionString = Common::databasePath();
|
||||||
DatabaseFactory dbFactory(connectionString);
|
DatabaseFactory dbFactory(connectionString);
|
||||||
SqliteDbService dbService(dbFactory);
|
SqliteDbService dbService(dbFactory);
|
||||||
QString commandName = args.first();
|
QString commandName = args.first();
|
||||||
|
if(commandName == "process")
|
||||||
|
{
|
||||||
|
if(args.length() < 1)
|
||||||
|
{
|
||||||
|
qDebug() << "Filename is required";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
QString file = args.at(1);
|
||||||
|
SandboxedProcessor processor(file);
|
||||||
|
return processor.process();
|
||||||
|
}
|
||||||
Command *cmd = commandFromName(commandName, dbService);
|
Command *cmd = commandFromName(commandName, dbService);
|
||||||
if(cmd != nullptr)
|
if(cmd != nullptr)
|
||||||
{
|
{
|
||||||
|
13
cli/pagedata.cpp
Normal file
13
cli/pagedata.cpp
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#include "pagedata.h"
|
||||||
|
|
||||||
|
QDataStream &operator<<(QDataStream &out, const PageData &pd)
|
||||||
|
{
|
||||||
|
out << pd.pagenumber << pd.content;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
QDataStream &operator>>(QDataStream &in, PageData &pd)
|
||||||
|
{
|
||||||
|
in >> pd.pagenumber >> pd.content;
|
||||||
|
return in;
|
||||||
|
}
|
@ -1,6 +1,9 @@
|
|||||||
#ifndef PAGEDATA_H
|
#ifndef PAGEDATA_H
|
||||||
#define PAGEDATA_H
|
#define PAGEDATA_H
|
||||||
#include <QString>
|
#include <QString>
|
||||||
|
#include <QMetaType>
|
||||||
|
#include <QDataStream>
|
||||||
|
|
||||||
class PageData
|
class PageData
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -10,10 +13,17 @@ class PageData
|
|||||||
PageData()
|
PageData()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
PageData(unsigned int pagenumber, QString content)
|
PageData(unsigned int pagenumber, QString content)
|
||||||
{
|
{
|
||||||
this->pagenumber = pagenumber;
|
this->pagenumber = pagenumber;
|
||||||
this->content = content;
|
this->content = content;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Q_DECLARE_METATYPE(PageData);
|
||||||
|
|
||||||
|
QDataStream &operator<<(QDataStream &out, const PageData &pd);
|
||||||
|
QDataStream &operator>>(QDataStream &in, PageData &pd);
|
||||||
|
|
||||||
#endif // PAGEDATA_H
|
#endif // PAGEDATA_H
|
||||||
|
@ -10,6 +10,8 @@ enum DataSource
|
|||||||
ARRAY
|
ARRAY
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define NOTHING_PROCESSED 4
|
||||||
|
|
||||||
class Processor
|
class Processor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
103
cli/sandboxedprocessor.cpp
Normal file
103
cli/sandboxedprocessor.cpp
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
#include <QFile>
|
||||||
|
#include <QFileInfo>
|
||||||
|
#include <QDataStream>
|
||||||
|
#include "sandboxedprocessor.h"
|
||||||
|
#include "pdfprocessor.h"
|
||||||
|
#include "defaulttextprocessor.h"
|
||||||
|
#include "tagstripperprocessor.h"
|
||||||
|
#include "nothingprocessor.h"
|
||||||
|
#include "odtprocessor.h"
|
||||||
|
#include "odsprocessor.h"
|
||||||
|
#include "../submodules/qssb.h/qssb.h"
|
||||||
|
#include "logger.h"
|
||||||
|
|
||||||
|
static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
|
||||||
|
static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
|
||||||
|
static NothingProcessor *nothingProcessor = new NothingProcessor();
|
||||||
|
static OdtProcessor *odtProcessor = new OdtProcessor();
|
||||||
|
static OdsProcessor *odsProcessor = new OdsProcessor();
|
||||||
|
|
||||||
|
static QMap<QString, Processor *> processors{
|
||||||
|
{"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor},
|
||||||
|
{"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor},
|
||||||
|
{"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor},
|
||||||
|
{"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor},
|
||||||
|
{"ods", odsProcessor}};
|
||||||
|
|
||||||
|
void SandboxedProcessor::enableSandbox(QString readablePath)
|
||||||
|
{
|
||||||
|
struct qssb_policy *policy = qssb_init_policy();
|
||||||
|
|
||||||
|
policy->namespace_options = QSSB_UNSHARE_NETWORK | QSSB_UNSHARE_USER;
|
||||||
|
|
||||||
|
if(!readablePath.isEmpty())
|
||||||
|
{
|
||||||
|
std::string readablePathLocation = readablePath.toStdString();
|
||||||
|
qssb_append_path_policy(policy, QSSB_FS_ALLOW_READ, readablePathLocation.c_str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
policy->no_fs = 1;
|
||||||
|
}
|
||||||
|
int ret = qssb_enable_policy(policy);
|
||||||
|
if(ret != 0)
|
||||||
|
{
|
||||||
|
qDebug() << "Failed to establish sandbox: " << ret;
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
qssb_free_policy(policy);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SandboxedProcessor::printResults(const QVector<PageData> &pageData)
|
||||||
|
{
|
||||||
|
QFile fsstdout;
|
||||||
|
fsstdout.open(stdout, QIODevice::WriteOnly);
|
||||||
|
QDataStream stream(&fsstdout);
|
||||||
|
|
||||||
|
for(const PageData &data : pageData)
|
||||||
|
{
|
||||||
|
stream << data;
|
||||||
|
// fsstdout.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
fsstdout.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
int SandboxedProcessor::process()
|
||||||
|
{
|
||||||
|
QFileInfo fileInfo(this->filePath);
|
||||||
|
Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor);
|
||||||
|
|
||||||
|
if(processor == nothingProcessor)
|
||||||
|
{
|
||||||
|
/* Nothing to do */
|
||||||
|
return NOTHING_PROCESSED;
|
||||||
|
}
|
||||||
|
|
||||||
|
QVector<PageData> pageData;
|
||||||
|
QString absPath = fileInfo.absoluteFilePath();
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if(processor->PREFERED_DATA_SOURCE == FILEPATH)
|
||||||
|
{
|
||||||
|
/* Read access to FS needed... doh..*/
|
||||||
|
enableSandbox(absPath);
|
||||||
|
pageData = processor->process(absPath);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QByteArray data = Utils::readFile(absPath);
|
||||||
|
enableSandbox();
|
||||||
|
pageData = processor->process(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch(LooqsGeneralException &e)
|
||||||
|
{
|
||||||
|
Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl;
|
||||||
|
return 3 /* PROCESSFAIL */;
|
||||||
|
}
|
||||||
|
|
||||||
|
printResults(pageData);
|
||||||
|
return 0;
|
||||||
|
}
|
23
cli/sandboxedprocessor.h
Normal file
23
cli/sandboxedprocessor.h
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
#ifndef SANDBOXEDPROCESSOR_H
|
||||||
|
#define SANDBOXEDPROCESSOR_H
|
||||||
|
#include <QString>
|
||||||
|
#include "pagedata.h"
|
||||||
|
|
||||||
|
class SandboxedProcessor
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
QString filePath;
|
||||||
|
|
||||||
|
void enableSandbox(QString readablePath = "");
|
||||||
|
void printResults(const QVector<PageData> &pageData);
|
||||||
|
|
||||||
|
public:
|
||||||
|
SandboxedProcessor(QString filepath)
|
||||||
|
{
|
||||||
|
this->filePath = filepath;
|
||||||
|
}
|
||||||
|
|
||||||
|
int process();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // SANDBOXEDPROCESSOR_H
|
Loading…
Reference in New Issue
Block a user