cli: moved processing of file content into sandboxed subprocess
Este cometimento está contido em:
		| @@ -18,10 +18,12 @@ LIBS += -luchardet -lpoppler-qt5 -lquazip5 | ||||
| SOURCES += \ | ||||
|         main.cpp \ | ||||
|     encodingdetector.cpp \ | ||||
|     pagedata.cpp \ | ||||
|     processor.cpp \ | ||||
|     pdfprocessor.cpp \ | ||||
|     defaulttextprocessor.cpp \ | ||||
|     commandadd.cpp \ | ||||
|     sandboxedprocessor.cpp \ | ||||
|     tagstripperprocessor.cpp \ | ||||
|     nothingprocessor.cpp \ | ||||
|     odtprocessor.cpp \ | ||||
| @@ -44,6 +46,7 @@ HEADERS += \ | ||||
|     defaulttextprocessor.h \ | ||||
|     command.h \ | ||||
|     commandadd.h \ | ||||
|     sandboxedprocessor.h \ | ||||
|     tagstripperprocessor.h \ | ||||
|     nothingprocessor.h \ | ||||
|     odtprocessor.h \ | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| #include <QSqlError> | ||||
| #include <QDateTime> | ||||
| #include <QtConcurrentMap> | ||||
| #include <QProcess> | ||||
| #include <functional> | ||||
| #include "filesaver.h" | ||||
| #include "processor.h" | ||||
| @@ -13,18 +14,6 @@ | ||||
| #include "odsprocessor.h" | ||||
| #include "utils.h" | ||||
| #include "logger.h" | ||||
| static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor(); | ||||
| static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor(); | ||||
| static NothingProcessor *nothingProcessor = new NothingProcessor(); | ||||
| static OdtProcessor *odtProcessor = new OdtProcessor(); | ||||
| static OdsProcessor *odsProcessor = new OdsProcessor(); | ||||
|  | ||||
| static QMap<QString, Processor *> processors{ | ||||
| 	{"pdf", new PdfProcessor()},	{"txt", defaultTextProcessor}, {"md", defaultTextProcessor}, | ||||
| 	{"py", defaultTextProcessor},	{"xml", nothingProcessor},	   {"html", tagStripperProcessor}, | ||||
| 	{"java", defaultTextProcessor}, {"js", defaultTextProcessor},  {"cpp", defaultTextProcessor}, | ||||
| 	{"c", defaultTextProcessor},	{"sql", defaultTextProcessor}, {"odt", odtProcessor}, | ||||
| 	{"ods", odsProcessor}}; | ||||
|  | ||||
| FileSaver::FileSaver(SqliteDbService &dbService) | ||||
| { | ||||
| @@ -106,32 +95,47 @@ int FileSaver::processFiles(const QVector<QString> paths, std::function<SaveFile | ||||
|  | ||||
| SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo) | ||||
| { | ||||
| 	Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor); | ||||
| 	QVector<PageData> pageData; | ||||
| 	QString absPath = fileInfo.absoluteFilePath(); | ||||
|  | ||||
| 	int status = -1; | ||||
| 	if(fileInfo.isFile()) | ||||
| 	{ | ||||
| 		try | ||||
| 		QProcess process; | ||||
| 		QStringList args; | ||||
| 		args << "process" << absPath; | ||||
| 		process.setProcessChannelMode(QProcess::ForwardedErrorChannel); | ||||
| 		process.start("/proc/self/exe", args); | ||||
| 		process.waitForStarted(); | ||||
| 		process.waitForFinished(); | ||||
|  | ||||
| 		/* TODO: This is suboptimal as it eats lots of mem | ||||
| 		 * but avoids a weird QDataStream/QProcess behaviour | ||||
| 		 * where it thinks the process has ended when it has not... | ||||
| 		 * | ||||
| 		 * Also, there seem to be issues with reads not being blocked, so | ||||
| 		 * the only reliable way appears to be waiting until the process | ||||
| 		 * finishes. | ||||
| 		 */ | ||||
| 		QDataStream in(process.readAllStandardOutput()); | ||||
| 		while(!in.atEnd()) | ||||
| 		{ | ||||
| 			if(processor->PREFERED_DATA_SOURCE == FILEPATH) | ||||
| 			{ | ||||
| 				pageData = processor->process(absPath); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				pageData = processor->process(Utils::readFile(absPath)); | ||||
| 			} | ||||
| 			PageData pd; | ||||
| 			in >> pd; | ||||
| 			pageData.append(pd); | ||||
| 		} | ||||
| 		catch(LooqsGeneralException &e) | ||||
| 		status = process.exitCode(); | ||||
| 		if(status != 0) | ||||
| 		{ | ||||
| 			Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl; | ||||
| 			Logger::error() << "Error while processing" << absPath << ":" | ||||
| 							<< "Exit code " << status << Qt::endl; | ||||
|  | ||||
| 			return PROCESSFAIL; | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Could happen if a file corrupted for example | ||||
| 	if(pageData.isEmpty() && processor != nothingProcessor) | ||||
| 	if(pageData.isEmpty() && status != NOTHING_PROCESSED) | ||||
| 	{ | ||||
| 		Logger::error() << "Could not get any content for " << absPath << Qt::endl; | ||||
| 	} | ||||
|   | ||||
							
								
								
									
										15
									
								
								cli/main.cpp
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								cli/main.cpp
									
									
									
									
									
								
							| @@ -24,6 +24,7 @@ | ||||
| #include "commandsearch.h" | ||||
| #include "databasefactory.h" | ||||
| #include "logger.h" | ||||
| #include "sandboxedprocessor.h" | ||||
| #include "../shared/common.h" | ||||
|  | ||||
| void printUsage(QString argv0) | ||||
| @@ -59,6 +60,7 @@ int main(int argc, char *argv[]) | ||||
| 	QCoreApplication app(argc, argv); | ||||
| 	QStringList args = app.arguments(); | ||||
| 	QString argv0 = args.takeFirst(); | ||||
|  | ||||
| 	if(args.length() < 1) | ||||
| 	{ | ||||
| 		printUsage(argv0); | ||||
| @@ -74,11 +76,24 @@ int main(int argc, char *argv[]) | ||||
| 		Logger::error() << "Error: " << e.message; | ||||
| 		return 1; | ||||
| 	} | ||||
| 	qRegisterMetaType<PageData>(); | ||||
|  | ||||
| 	QString connectionString = Common::databasePath(); | ||||
| 	DatabaseFactory dbFactory(connectionString); | ||||
| 	SqliteDbService dbService(dbFactory); | ||||
| 	QString commandName = args.first(); | ||||
| 	if(commandName == "process") | ||||
| 	{ | ||||
| 		if(args.length() < 1) | ||||
| 		{ | ||||
| 			qDebug() << "Filename is required"; | ||||
| 			return 1; | ||||
| 		} | ||||
|  | ||||
| 		QString file = args.at(1); | ||||
| 		SandboxedProcessor processor(file); | ||||
| 		return processor.process(); | ||||
| 	} | ||||
| 	Command *cmd = commandFromName(commandName, dbService); | ||||
| 	if(cmd != nullptr) | ||||
| 	{ | ||||
|   | ||||
							
								
								
									
										13
									
								
								cli/pagedata.cpp
									
									
									
									
									
										Ficheiro normal
									
								
							
							
						
						
									
										13
									
								
								cli/pagedata.cpp
									
									
									
									
									
										Ficheiro normal
									
								
							| @@ -0,0 +1,13 @@ | ||||
| #include "pagedata.h" | ||||
|  | ||||
| QDataStream &operator<<(QDataStream &out, const PageData &pd) | ||||
| { | ||||
| 	out << pd.pagenumber << pd.content; | ||||
| 	return out; | ||||
| } | ||||
|  | ||||
| QDataStream &operator>>(QDataStream &in, PageData &pd) | ||||
| { | ||||
| 	in >> pd.pagenumber >> pd.content; | ||||
| 	return in; | ||||
| } | ||||
| @@ -1,6 +1,9 @@ | ||||
| #ifndef PAGEDATA_H | ||||
| #define PAGEDATA_H | ||||
| #include <QString> | ||||
| #include <QMetaType> | ||||
| #include <QDataStream> | ||||
|  | ||||
| class PageData | ||||
| { | ||||
|   public: | ||||
| @@ -10,10 +13,17 @@ class PageData | ||||
| 	PageData() | ||||
| 	{ | ||||
| 	} | ||||
|  | ||||
| 	PageData(unsigned int pagenumber, QString content) | ||||
| 	{ | ||||
| 		this->pagenumber = pagenumber; | ||||
| 		this->content = content; | ||||
| 	} | ||||
| }; | ||||
|  | ||||
| Q_DECLARE_METATYPE(PageData); | ||||
|  | ||||
| QDataStream &operator<<(QDataStream &out, const PageData &pd); | ||||
| QDataStream &operator>>(QDataStream &in, PageData &pd); | ||||
|  | ||||
| #endif // PAGEDATA_H | ||||
|   | ||||
| @@ -10,6 +10,8 @@ enum DataSource | ||||
| 	ARRAY | ||||
| }; | ||||
|  | ||||
| #define NOTHING_PROCESSED 4 | ||||
|  | ||||
| class Processor | ||||
| { | ||||
|   public: | ||||
|   | ||||
							
								
								
									
										103
									
								
								cli/sandboxedprocessor.cpp
									
									
									
									
									
										Ficheiro normal
									
								
							
							
						
						
									
										103
									
								
								cli/sandboxedprocessor.cpp
									
									
									
									
									
										Ficheiro normal
									
								
							| @@ -0,0 +1,103 @@ | ||||
| #include <QFile> | ||||
| #include <QFileInfo> | ||||
| #include <QDataStream> | ||||
| #include "sandboxedprocessor.h" | ||||
| #include "pdfprocessor.h" | ||||
| #include "defaulttextprocessor.h" | ||||
| #include "tagstripperprocessor.h" | ||||
| #include "nothingprocessor.h" | ||||
| #include "odtprocessor.h" | ||||
| #include "odsprocessor.h" | ||||
| #include "../submodules/qssb.h/qssb.h" | ||||
| #include "logger.h" | ||||
|  | ||||
| static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor(); | ||||
| static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor(); | ||||
| static NothingProcessor *nothingProcessor = new NothingProcessor(); | ||||
| static OdtProcessor *odtProcessor = new OdtProcessor(); | ||||
| static OdsProcessor *odsProcessor = new OdsProcessor(); | ||||
|  | ||||
| static QMap<QString, Processor *> processors{ | ||||
| 	{"pdf", new PdfProcessor()},	{"txt", defaultTextProcessor}, {"md", defaultTextProcessor}, | ||||
| 	{"py", defaultTextProcessor},	{"xml", nothingProcessor},	   {"html", tagStripperProcessor}, | ||||
| 	{"java", defaultTextProcessor}, {"js", defaultTextProcessor},  {"cpp", defaultTextProcessor}, | ||||
| 	{"c", defaultTextProcessor},	{"sql", defaultTextProcessor}, {"odt", odtProcessor}, | ||||
| 	{"ods", odsProcessor}}; | ||||
|  | ||||
| void SandboxedProcessor::enableSandbox(QString readablePath) | ||||
| { | ||||
| 	struct qssb_policy *policy = qssb_init_policy(); | ||||
|  | ||||
| 	policy->namespace_options = QSSB_UNSHARE_NETWORK | QSSB_UNSHARE_USER; | ||||
|  | ||||
| 	if(!readablePath.isEmpty()) | ||||
| 	{ | ||||
| 		std::string readablePathLocation = readablePath.toStdString(); | ||||
| 		qssb_append_path_policy(policy, QSSB_FS_ALLOW_READ, readablePathLocation.c_str()); | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		policy->no_fs = 1; | ||||
| 	} | ||||
| 	int ret = qssb_enable_policy(policy); | ||||
| 	if(ret != 0) | ||||
| 	{ | ||||
| 		qDebug() << "Failed to establish sandbox: " << ret; | ||||
| 		exit(EXIT_FAILURE); | ||||
| 	} | ||||
| 	qssb_free_policy(policy); | ||||
| } | ||||
|  | ||||
| void SandboxedProcessor::printResults(const QVector<PageData> &pageData) | ||||
| { | ||||
| 	QFile fsstdout; | ||||
| 	fsstdout.open(stdout, QIODevice::WriteOnly); | ||||
| 	QDataStream stream(&fsstdout); | ||||
|  | ||||
| 	for(const PageData &data : pageData) | ||||
| 	{ | ||||
| 		stream << data; | ||||
| 		// fsstdout.flush(); | ||||
| 	} | ||||
|  | ||||
| 	fsstdout.close(); | ||||
| } | ||||
|  | ||||
| int SandboxedProcessor::process() | ||||
| { | ||||
| 	QFileInfo fileInfo(this->filePath); | ||||
| 	Processor *processor = processors.value(fileInfo.suffix(), nothingProcessor); | ||||
|  | ||||
| 	if(processor == nothingProcessor) | ||||
| 	{ | ||||
| 		/* Nothing to do */ | ||||
| 		return NOTHING_PROCESSED; | ||||
| 	} | ||||
|  | ||||
| 	QVector<PageData> pageData; | ||||
| 	QString absPath = fileInfo.absoluteFilePath(); | ||||
|  | ||||
| 	try | ||||
| 	{ | ||||
| 		if(processor->PREFERED_DATA_SOURCE == FILEPATH) | ||||
| 		{ | ||||
| 			/* Read access to FS needed... doh..*/ | ||||
| 			enableSandbox(absPath); | ||||
| 			pageData = processor->process(absPath); | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			QByteArray data = Utils::readFile(absPath); | ||||
| 			enableSandbox(); | ||||
| 			pageData = processor->process(data); | ||||
| 		} | ||||
| 	} | ||||
| 	catch(LooqsGeneralException &e) | ||||
| 	{ | ||||
| 		Logger::error() << "Error while processing" << absPath << ":" << e.message << Qt::endl; | ||||
| 		return 3 /* PROCESSFAIL */; | ||||
| 	} | ||||
|  | ||||
| 	printResults(pageData); | ||||
| 	return 0; | ||||
| } | ||||
							
								
								
									
										23
									
								
								cli/sandboxedprocessor.h
									
									
									
									
									
										Ficheiro normal
									
								
							
							
						
						
									
										23
									
								
								cli/sandboxedprocessor.h
									
									
									
									
									
										Ficheiro normal
									
								
							| @@ -0,0 +1,23 @@ | ||||
| #ifndef SANDBOXEDPROCESSOR_H | ||||
| #define SANDBOXEDPROCESSOR_H | ||||
| #include <QString> | ||||
| #include "pagedata.h" | ||||
|  | ||||
| class SandboxedProcessor | ||||
| { | ||||
|   private: | ||||
| 	QString filePath; | ||||
|  | ||||
| 	void enableSandbox(QString readablePath = ""); | ||||
| 	void printResults(const QVector<PageData> &pageData); | ||||
|  | ||||
|   public: | ||||
| 	SandboxedProcessor(QString filepath) | ||||
| 	{ | ||||
| 		this->filePath = filepath; | ||||
| 	} | ||||
|  | ||||
| 	int process(); | ||||
| }; | ||||
|  | ||||
| #endif // SANDBOXEDPROCESSOR_H | ||||
		Criar uma nova questão referindo esta
	
	Bloquear um utilizador