2019-04-16 08:50:08 +02:00
|
|
|
#include <QSqlError>
|
|
|
|
#include <QDateTime>
|
|
|
|
#include <QtConcurrentMap>
|
2021-08-07 18:38:23 +02:00
|
|
|
#include <QProcess>
|
2019-04-16 08:50:08 +02:00
|
|
|
#include <functional>
|
|
|
|
#include "filesaver.h"
|
|
|
|
#include "processor.h"
|
|
|
|
#include "pdfprocessor.h"
|
|
|
|
#include "defaulttextprocessor.h"
|
|
|
|
#include "tagstripperprocessor.h"
|
|
|
|
#include "nothingprocessor.h"
|
|
|
|
#include "odtprocessor.h"
|
|
|
|
#include "odsprocessor.h"
|
|
|
|
#include "utils.h"
|
|
|
|
#include "logger.h"
|
|
|
|
|
|
|
|
FileSaver::FileSaver(SqliteDbService &dbService)
|
|
|
|
{
|
|
|
|
this->dbService = &dbService;
|
|
|
|
}
|
|
|
|
|
|
|
|
SaveFileResult FileSaver::addFile(QString path)
|
|
|
|
{
|
|
|
|
QFileInfo info(path);
|
|
|
|
QString absPath = info.absoluteFilePath();
|
2022-05-29 15:46:06 +02:00
|
|
|
|
2019-04-16 08:50:08 +02:00
|
|
|
auto mtime = info.lastModified().toSecsSinceEpoch();
|
2023-03-26 15:43:21 +02:00
|
|
|
|
|
|
|
bool exists = false;
|
|
|
|
if(this->fileSaverOptions.fillExistingContentless)
|
|
|
|
{
|
|
|
|
exists = this->dbService->fileExistsInDatabase(absPath, mtime, 'c');
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
exists = this->dbService->fileExistsInDatabase(absPath, mtime);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(exists)
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
|
|
|
return SKIPPED;
|
|
|
|
}
|
2023-03-26 15:43:21 +02:00
|
|
|
|
2019-04-16 08:50:08 +02:00
|
|
|
return saveFile(info);
|
|
|
|
}
|
|
|
|
|
|
|
|
SaveFileResult FileSaver::updateFile(QString path)
|
|
|
|
{
|
|
|
|
QFileInfo info(path);
|
|
|
|
return saveFile(info);
|
|
|
|
}
|
|
|
|
|
2023-01-08 17:37:28 +01:00
|
|
|
int FileSaver::addFiles(const QVector<QString> paths)
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
2023-01-08 17:37:28 +01:00
|
|
|
return processFiles(paths, std::bind(&FileSaver::addFile, this, std::placeholders::_1));
|
2019-04-16 08:50:08 +02:00
|
|
|
}
|
|
|
|
|
2023-01-08 17:37:28 +01:00
|
|
|
int FileSaver::updateFiles(const QVector<QString> paths)
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
2023-01-08 17:37:28 +01:00
|
|
|
return processFiles(paths, std::bind(&FileSaver::updateFile, this, std::placeholders::_1));
|
2019-04-16 08:50:08 +02:00
|
|
|
}
|
|
|
|
|
2023-01-08 17:37:28 +01:00
|
|
|
int FileSaver::processFiles(const QVector<QString> paths, std::function<SaveFileResult(QString path)> saverFunc)
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
|
|
|
std::atomic<bool> terminate{false};
|
2019-04-30 23:44:27 +02:00
|
|
|
std::atomic<int> processedCount{0};
|
2019-04-16 08:50:08 +02:00
|
|
|
QtConcurrent::blockingMap(paths,
|
|
|
|
[&](const QString &path)
|
|
|
|
{
|
|
|
|
if(terminate.load())
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
2023-01-08 17:37:28 +01:00
|
|
|
if(this->fileSaverOptions.verbose)
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
2021-06-12 22:55:56 +02:00
|
|
|
Logger::info() << "Processing " << path << Qt::endl;
|
2019-04-16 08:50:08 +02:00
|
|
|
}
|
|
|
|
SaveFileResult result = saverFunc(path);
|
|
|
|
if(result == DBFAIL || result == PROCESSFAIL)
|
|
|
|
{
|
2021-06-12 22:55:56 +02:00
|
|
|
Logger::error() << "Failed to process " << path << Qt::endl;
|
2023-01-08 17:37:28 +01:00
|
|
|
if(!this->fileSaverOptions.keepGoing)
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
|
|
|
terminate = true;
|
|
|
|
}
|
|
|
|
}
|
2019-04-30 23:44:27 +02:00
|
|
|
else
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
2019-04-30 23:44:27 +02:00
|
|
|
++processedCount;
|
2023-01-08 17:37:28 +01:00
|
|
|
if(this->fileSaverOptions.verbose)
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
2019-04-30 23:44:27 +02:00
|
|
|
if(result == SKIPPED)
|
|
|
|
{
|
|
|
|
Logger::info() << "Skipped" << path
|
2021-06-12 22:55:56 +02:00
|
|
|
<< "as it already exists in the database" << Qt::endl;
|
2019-04-30 23:44:27 +02:00
|
|
|
}
|
|
|
|
else if(result == OK)
|
|
|
|
{
|
2022-06-02 13:44:15 +02:00
|
|
|
Logger::info() << "Saved" << path << Qt::endl;
|
2019-04-30 23:44:27 +02:00
|
|
|
}
|
2022-06-24 17:42:55 +02:00
|
|
|
else if(result == OK_WASEMPTY)
|
|
|
|
{
|
|
|
|
Logger::info() << "Saved (but content was empty)" << path << Qt::endl;
|
|
|
|
}
|
2019-04-16 08:50:08 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
2019-04-30 23:44:27 +02:00
|
|
|
return processedCount.load();
|
2019-04-16 08:50:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
|
|
|
|
{
|
|
|
|
QVector<PageData> pageData;
|
2022-05-29 15:46:06 +02:00
|
|
|
QString canonicalPath = fileInfo.canonicalFilePath();
|
2019-04-16 08:50:08 +02:00
|
|
|
|
2022-06-24 17:34:11 +02:00
|
|
|
int processorReturnCode = -1;
|
2022-04-14 15:02:17 +02:00
|
|
|
|
2022-06-23 15:25:49 +02:00
|
|
|
if(!fileInfo.isReadable())
|
|
|
|
{
|
|
|
|
return NOACCESS;
|
|
|
|
}
|
|
|
|
|
2022-04-14 15:02:17 +02:00
|
|
|
if(!fileInfo.exists())
|
|
|
|
{
|
|
|
|
return NOTFOUND;
|
|
|
|
}
|
|
|
|
|
2019-04-16 08:50:08 +02:00
|
|
|
if(fileInfo.isFile())
|
|
|
|
{
|
2022-05-29 15:46:06 +02:00
|
|
|
for(QString &excludedPath : this->excludedPaths)
|
|
|
|
{
|
|
|
|
if(canonicalPath.startsWith(excludedPath))
|
|
|
|
{
|
2023-01-08 17:37:28 +01:00
|
|
|
if(this->fileSaverOptions.verbose)
|
|
|
|
{
|
|
|
|
Logger::info() << "Skipped due to excluded path";
|
|
|
|
}
|
2022-05-29 15:46:06 +02:00
|
|
|
return SKIPPED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-08 17:37:28 +01:00
|
|
|
bool mustFillContent = this->fileSaverOptions.fillExistingContentless;
|
2023-01-08 17:37:28 +01:00
|
|
|
if(!mustFillContent)
|
|
|
|
{
|
2023-01-08 17:37:28 +01:00
|
|
|
mustFillContent = !this->fileSaverOptions.metadataOnly;
|
2023-01-08 17:37:28 +01:00
|
|
|
if(mustFillContent)
|
|
|
|
{
|
|
|
|
auto filetype = this->dbService->queryFileType(fileInfo.absolutePath());
|
2023-04-02 18:38:07 +02:00
|
|
|
mustFillContent = !filetype.has_value() || filetype.value() == 'c';
|
2023-01-08 17:37:28 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(fileInfo.size() > 0 && mustFillContent)
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
2022-06-24 17:44:44 +02:00
|
|
|
QProcess process;
|
|
|
|
QStringList args;
|
|
|
|
args << "process" << canonicalPath;
|
|
|
|
process.setProcessChannelMode(QProcess::ForwardedErrorChannel);
|
|
|
|
process.start("/proc/self/exe", args);
|
|
|
|
process.waitForStarted();
|
|
|
|
process.waitForFinished();
|
|
|
|
|
|
|
|
/* TODO: This is suboptimal as it eats lots of mem
|
|
|
|
* but avoids a weird QDataStream/QProcess behaviour
|
|
|
|
* where it thinks the process has ended when it has not...
|
|
|
|
*
|
|
|
|
* Also, there seem to be issues with reads not being blocked, so
|
|
|
|
* the only reliable way appears to be waiting until the process
|
|
|
|
* finishes.
|
|
|
|
*/
|
|
|
|
QDataStream in(process.readAllStandardOutput());
|
|
|
|
while(!in.atEnd())
|
|
|
|
{
|
|
|
|
PageData pd;
|
|
|
|
in >> pd;
|
|
|
|
pageData.append(pd);
|
|
|
|
}
|
|
|
|
processorReturnCode = process.exitCode();
|
|
|
|
if(processorReturnCode != OK && processorReturnCode != OK_WASEMPTY)
|
|
|
|
{
|
|
|
|
Logger::error() << "FileSaver::saveFile(): Error while processing" << canonicalPath << ":"
|
|
|
|
<< "Exit code " << processorReturnCode << Qt::endl;
|
2021-08-07 18:38:23 +02:00
|
|
|
|
2022-06-24 17:44:44 +02:00
|
|
|
return static_cast<SaveFileResult>(processorReturnCode);
|
|
|
|
}
|
2019-04-16 08:50:08 +02:00
|
|
|
}
|
|
|
|
}
|
2023-01-08 17:37:28 +01:00
|
|
|
SaveFileResult result = this->dbService->saveFile(fileInfo, pageData, this->fileSaverOptions.metadataOnly);
|
2022-06-24 17:34:11 +02:00
|
|
|
if(result == OK && processorReturnCode == OK_WASEMPTY)
|
2019-04-16 08:50:08 +02:00
|
|
|
{
|
2022-06-24 17:34:11 +02:00
|
|
|
return OK_WASEMPTY;
|
2019-04-16 08:50:08 +02:00
|
|
|
}
|
2022-06-24 17:34:11 +02:00
|
|
|
return result;
|
2019-04-16 08:50:08 +02:00
|
|
|
}
|