15 İşleme

29 değiştirilmiş dosya ile 251 ekleme ve 53 silme

Dosyayı Görüntüle

@ -28,7 +28,7 @@ There is no need to write the long form of filters. There are also booleans avai
The screenshots in this section may occasionally be slightly outdated, but they are usually recent enough to get an overall impression of the current state of the GUI.
## Current status
Latest version: 2022-11-19, v0.8.1
Latest version: 2023-05-07, v0.9
Please keep in mind: looqs is still at an early stage and may exhibit some weirdness and contain bugs.

Dosyayı Görüntüle

@ -46,6 +46,8 @@ packagesExist(quazip1-qt5) {
}
INCLUDEPATH += $$PWD/../shared
INCLUDEPATH += /usr/include/poppler/qt5/
DEPENDPATH += $$PWD/../shared
win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../shared/release/libshared.a

Dosyayı Görüntüle

@ -24,7 +24,7 @@ QSharedPointer<PreviewResult> PreviewGeneratorOdt::generate(RenderConfig config,
throw LooqsGeneralException("Error while reading content.xml of " + documentPath);
}
TagStripperProcessor tsp;
QString content = tsp.process(entireContent).constFirst().content;
QString content = tsp.process(entireContent).pages.constFirst().content;
PreviewGeneratorPlainText plainTextGenerator;
result->setText(plainTextGenerator.generatePreviewText(content, config, info.fileName()));

Dosyayı Görüntüle

@ -24,7 +24,9 @@ QString DefaultTextProcessor::processText(const QByteArray &data) const
return {};
}
QVector<PageData> DefaultTextProcessor::process(const QByteArray &data) const
DocumentProcessResult DefaultTextProcessor::process(const QByteArray &data) const
{
return {{0, processText(data)}};
DocumentProcessResult result;
result.pages.append({0, processText(data)});
return result;
}

Dosyayı Görüntüle

@ -11,7 +11,7 @@ class DefaultTextProcessor : public Processor
public:
DefaultTextProcessor();
QString processText(const QByteArray &data) const;
QVector<PageData> process(const QByteArray &data) const override;
DocumentProcessResult process(const QByteArray &data) const override;
};
#endif // DEFAULTTEXTPROCESSOR_H

Dosyayı Görüntüle

@ -0,0 +1,31 @@
#include "documentoutlineentry.h"
DocumentOutlineEntry::DocumentOutlineEntry()
{
}
QDataStream &operator<<(QDataStream &out, const DocumentOutlineEntry &pd)
{
out << pd.text << pd.type << pd.destinationPage;
out << pd.children.size();
for(const DocumentOutlineEntry &entry : pd.children)
{
out << entry;
}
return out;
}
QDataStream &operator>>(QDataStream &in, DocumentOutlineEntry &pd)
{
in >> pd.text >> pd.type >> pd.destinationPage;
int numChildren;
in >> numChildren;
for(int i = 0; i < numChildren; i++)
{
DocumentOutlineEntry entry;
in >> entry;
pd.children.append(entry);
}
return in;
}

Dosyayı Görüntüle

@ -0,0 +1,29 @@
#ifndef DOCUMENTOUTLINEENTRY_H
#define DOCUMENTOUTLINEENTRY_H
#include <QMetaType>
#include <QDataStream>
#include <QString>
enum OutlineDestinationType
{
OUTLINE_DESTINATION_TYPE_NONE,
OUTLINE_DESTINATION_TYPE_PAGE
/* In the future, links, or #anchors are possible */
};
class DocumentOutlineEntry
{
public:
DocumentOutlineEntry();
QVector<DocumentOutlineEntry> children;
OutlineDestinationType type;
QString text;
unsigned int destinationPage;
};
Q_DECLARE_METATYPE(DocumentOutlineEntry);
QDataStream &operator<<(QDataStream &out, const DocumentOutlineEntry &pd);
QDataStream &operator>>(QDataStream &in, DocumentOutlineEntry &pd);
#endif // DOCUMENTOUTLINEENTRY_H

Dosyayı Görüntüle

@ -0,0 +1,39 @@
#include "documentprocessresult.h"
QDataStream &operator<<(QDataStream &out, const DocumentProcessResult &pd)
{
out << pd.pages.size();
out << pd.outlines.size();
for(const PageData &pd : pd.pages)
{
out << pd;
}
for(const DocumentOutlineEntry &outline : pd.outlines)
{
out << outline;
}
return out;
}
QDataStream &operator>>(QDataStream &in, DocumentProcessResult &pd)
{
int numPages, numOutlines;
in >> numPages;
in >> numOutlines;
for(int i = 0; i < numPages; i++)
{
PageData data;
in >> data;
pd.pages.append(data);
}
for(int i = 0; i < numOutlines; i++)
{
DocumentOutlineEntry outline;
in >> outline;
pd.outlines.append(outline);
}
return in;
}

Dosyayı Görüntüle

@ -0,0 +1,17 @@
#ifndef DOCUMENTPROCESSRESULT_H
#define DOCUMENTPROCESSRESULT_H
#include <pagedata.h>
#include <documentoutlineentry.h>
class DocumentProcessResult
{
public:
QVector<PageData> pages;
QVector<DocumentOutlineEntry> outlines;
};
Q_DECLARE_METATYPE(DocumentProcessResult);
QDataStream &operator<<(QDataStream &out, const DocumentProcessResult &pd);
QDataStream &operator>>(QDataStream &in, DocumentProcessResult &pd);
#endif // DOCUMENTPROCESSRESULT_H

Dosyayı Görüntüle

@ -110,7 +110,7 @@ int FileSaver::processFiles(const QVector<QString> paths, std::function<SaveFile
SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
{
QVector<PageData> pageData;
DocumentProcessResult processResult;
QString canonicalPath = fileInfo.canonicalFilePath();
int processorReturnCode = -1;
@ -169,11 +169,10 @@ SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
* finishes.
*/
QDataStream in(process.readAllStandardOutput());
while(!in.atEnd())
if(!in.atEnd())
{
PageData pd;
in >> pd;
pageData.append(pd);
in >> processResult;
}
processorReturnCode = process.exitCode();
if(processorReturnCode != OK && processorReturnCode != OK_WASEMPTY)
@ -185,7 +184,7 @@ SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
}
}
}
SaveFileResult result = this->dbService->saveFile(fileInfo, pageData, this->fileSaverOptions.metadataOnly);
SaveFileResult result = this->dbService->saveFile(fileInfo, processResult, this->fileSaverOptions.metadataOnly);
if(result == OK && processorReturnCode == OK_WASEMPTY)
{
return OK_WASEMPTY;

Dosyayı Görüntüle

@ -29,6 +29,11 @@ bool LooqsQuery::hasContentSearch() const
return (this->getTokensMask() & FILTER_CONTENT) == FILTER_CONTENT;
}
bool LooqsQuery::hasOutlineSearch() const
{
return (this->getTokensMask() & FILTER_OUTLINE_CONTAINS) == FILTER_OUTLINE_CONTAINS;
}
bool LooqsQuery::hasPathSearch() const
{
return (this->getTokensMask() & FILTER_PATH) == FILTER_PATH;
@ -289,6 +294,10 @@ LooqsQuery LooqsQuery::build(QString expression, TokenType loneWordsTokenType, b
{
tokenType = FILTER_TAG_ASSIGNED;
}
else if(filtername == "toc" || filtername == "outline")
{
tokenType = FILTER_OUTLINE_CONTAINS;
}
// TODO: given this is not really a "filter", this feels slightly misplaced here
else if(filtername == "sort")
{

Dosyayı Görüntüle

@ -68,6 +68,7 @@ class LooqsQuery
this->limit = limit;
}
bool hasContentSearch() const;
bool hasOutlineSearch() const;
bool hasPathSearch() const;
void addSortCondition(SortCondition sc);

2
shared/migrations/6.sql Normal dosya
Dosyayı Görüntüle

@ -0,0 +1,2 @@
CREATE TABLE outline(id INTEGER PRIMARY KEY, fileid INTEGER REFERENCES file (id) ON DELETE CASCADE, text varchar(1024), page integer);
CREATE INDEX outline_fileid ON outline (fileid);

Dosyayı Görüntüle

@ -5,5 +5,6 @@
<file>3.sql</file>
<file>4.sql</file>
<file>5.sql</file>
<file>6.sql</file>
</qresource>
</RCC>

Dosyayı Görüntüle

@ -10,7 +10,7 @@ class NothingProcessor : public Processor
NothingProcessor();
public:
QVector<PageData> process(const QByteArray & /*data*/) const override
DocumentProcessResult process(const QByteArray & /*data*/) const override
{
return {};
}

Dosyayı Görüntüle

@ -3,12 +3,12 @@
#include "odtprocessor.h"
#include "tagstripperprocessor.h"
QVector<PageData> OdtProcessor::process(const QByteArray & /*data*/) const
DocumentProcessResult OdtProcessor::process(const QByteArray & /*data*/) const
{
throw LooqsGeneralException("Not implemented yet");
}
QVector<PageData> OdtProcessor::process(QString path) const
DocumentProcessResult OdtProcessor::process(QString path) const
{
QuaZipFile zipFile(path);
zipFile.setFileName("content.xml");

Dosyayı Görüntüle

@ -8,9 +8,9 @@ class OdtProcessor : public Processor
{
this->PREFERED_DATA_SOURCE = FILEPATH;
}
QVector<PageData> process(const QByteArray &data) const override;
DocumentProcessResult process(const QByteArray &data) const override;
QVector<PageData> process(QString path) const override;
DocumentProcessResult process(QString path) const override;
};
#endif // ODTPROCESSOR_H

Dosyayı Görüntüle

@ -5,9 +5,30 @@ PdfProcessor::PdfProcessor()
{
}
QVector<PageData> PdfProcessor::process(const QByteArray &data) const
QVector<DocumentOutlineEntry> PdfProcessor::createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const
{
QVector<PageData> result;
QVector<DocumentOutlineEntry> result;
for(const Poppler::OutlineItem &outlineItem : outlineItems)
{
DocumentOutlineEntry documentOutlineEntry;
documentOutlineEntry.text = outlineItem.name();
documentOutlineEntry.type = OUTLINE_DESTINATION_TYPE_PAGE;
if(!outlineItem.destination().isNull())
{
documentOutlineEntry.destinationPage = outlineItem.destination()->pageNumber();
}
if(outlineItem.hasChildren())
{
documentOutlineEntry.children = createOutline(outlineItem.children());
}
result.append(documentOutlineEntry);
}
return result;
}
DocumentProcessResult PdfProcessor::process(const QByteArray &data) const
{
DocumentProcessResult result;
QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data));
if(doc.isNull())
{
@ -26,12 +47,13 @@ QVector<PageData> PdfProcessor::process(const QByteArray &data) const
for(auto i = 0; i < pagecount; i++)
{
QString text = doc->page(i)->text(entirePage);
result.append({static_cast<unsigned int>(i + 1), text});
result.pages.append({static_cast<unsigned int>(i + 1), text});
/*TODO: hack, so we can fts search several words over the whole document, not just pages.
* this of course uses more space and should be solved differently.
*/
entire += text;
}
result.append({0, entire});
result.pages.append({0, entire});
result.outlines = createOutline(doc->outline());
return result;
}

Dosyayı Görüntüle

@ -1,5 +1,6 @@
#ifndef PDFPROCESSOR_H
#define PDFPROCESSOR_H
#include <poppler-qt5.h>
#include "processor.h"
class PdfProcessor : public Processor
{
@ -7,7 +8,8 @@ class PdfProcessor : public Processor
PdfProcessor();
public:
QVector<PageData> process(const QByteArray &data) const override;
QVector<DocumentOutlineEntry> createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const;
DocumentProcessResult process(const QByteArray &data) const override;
};
#endif // PDFPROCESSOR_H

Dosyayı Görüntüle

@ -2,8 +2,8 @@
#define PROCESSOR_H
#include <QVector>
#include <QFile>
#include "pagedata.h"
#include "utils.h"
#include "documentprocessresult.h"
enum DataSource
{
FILEPATH,
@ -18,8 +18,8 @@ class Processor
* a single file */
DataSource PREFERED_DATA_SOURCE = ARRAY;
Processor();
virtual QVector<PageData> process(const QByteArray &data) const = 0;
virtual QVector<PageData> process(QString path) const
virtual DocumentProcessResult process(const QByteArray &data) const = 0;
virtual DocumentProcessResult process(QString path) const
{
return process(Utils::readFile(path));
}

Dosyayı Görüntüle

@ -65,18 +65,12 @@ void SandboxedProcessor::enableSandbox(QString readablePath)
exile_free_policy(policy);
}
void SandboxedProcessor::printResults(const QVector<PageData> &pageData)
void SandboxedProcessor::printResults(const DocumentProcessResult &result)
{
QFile fsstdout;
fsstdout.open(stdout, QIODevice::WriteOnly);
QDataStream stream(&fsstdout);
for(const PageData &data : pageData)
{
stream << data;
// fsstdout.flush();
}
stream << result;
fsstdout.close();
}
@ -102,7 +96,7 @@ SaveFileResult SandboxedProcessor::process()
return OK;
}
QVector<PageData> pageData;
DocumentProcessResult processResult;
QString absPath = fileInfo.absoluteFilePath();
try
@ -111,13 +105,13 @@ SaveFileResult SandboxedProcessor::process()
{
/* Read access to FS needed... doh..*/
enableSandbox(absPath);
pageData = processor->process(absPath);
processResult = processor->process(absPath);
}
else
{
QByteArray data = Utils::readFile(absPath);
enableSandbox();
pageData = processor->process(data);
processResult = processor->process(data);
}
}
catch(LooqsGeneralException &e)
@ -126,6 +120,6 @@ SaveFileResult SandboxedProcessor::process()
return PROCESSFAIL;
}
printResults(pageData);
return pageData.isEmpty() ? OK_WASEMPTY : OK;
printResults(processResult);
return processResult.pages.isEmpty() ? OK_WASEMPTY : OK;
}

Dosyayı Görüntüle

@ -2,7 +2,7 @@
#define SANDBOXEDPROCESSOR_H
#include <QString>
#include <QMimeDatabase>
#include "pagedata.h"
#include "documentprocessresult.h"
#include "savefileresult.h"
class SandboxedProcessor
@ -12,7 +12,7 @@ class SandboxedProcessor
QMimeDatabase mimeDatabase;
void enableSandbox(QString readablePath = "");
void printResults(const QVector<PageData> &pageData);
void printResults(const DocumentProcessResult &pageData);
public:
SandboxedProcessor(QString filepath)

Dosyayı Görüntüle

@ -42,6 +42,8 @@ SOURCES += sqlitesearch.cpp \
dbmigrator.cpp \
defaulttextprocessor.cpp \
dirscanworker.cpp \
documentoutlineentry.cpp \
documentprocessresult.cpp \
encodingdetector.cpp \
filesaver.cpp \
filescanworker.cpp \
@ -72,6 +74,8 @@ HEADERS += sqlitesearch.h \
dbmigrator.h \
defaulttextprocessor.h \
dirscanworker.h \
documentoutlineentry.h \
documentprocessresult.h \
encodingdetector.h \
filedata.h \
filesaver.h \

Dosyayı Görüntüle

@ -253,6 +253,29 @@ bool SqliteDbService::insertToFTS(bool useTrigrams, QSqlDatabase &db, int fileid
return true;
}
bool SqliteDbService::insertOutline(QSqlDatabase &db, int fileid, const QVector<DocumentOutlineEntry> &outlines)
{
QSqlQuery outlineQuery(db);
outlineQuery.prepare("INSERT INTO outline(fileid, text, page) VALUES(?,?,?)");
outlineQuery.addBindValue(fileid);
for(const DocumentOutlineEntry &outline : outlines)
{
outlineQuery.bindValue(1, outline.text.toLower());
outlineQuery.bindValue(2, outline.destinationPage);
if(!outlineQuery.exec())
{
Logger::error() << "Failed outline insertion " << outlineQuery.lastError() << Qt::endl;
return false;
}
if(!insertOutline(db, fileid, outline.children))
{
Logger::error() << "Failed outline insertion (children)) " << outlineQuery.lastError() << Qt::endl;
return false;
}
}
return true;
}
QSqlQuery SqliteDbService::exec(QString querystr, std::initializer_list<QVariant> args)
{
auto query = QSqlQuery(dbFactory->forCurrentThread());
@ -278,7 +301,7 @@ bool SqliteDbService::execBool(QString querystr, std::initializer_list<QVariant>
return query.value(0).toBool();
}
SaveFileResult SqliteDbService::saveFile(QFileInfo fileInfo, QVector<PageData> &pageData, bool pathsOnly)
SaveFileResult SqliteDbService::saveFile(QFileInfo fileInfo, DocumentProcessResult &processResult, bool pathsOnly)
{
QString absPath = fileInfo.absoluteFilePath();
auto mtime = fileInfo.lastModified().toSecsSinceEpoch();
@ -323,18 +346,24 @@ SaveFileResult SqliteDbService::saveFile(QFileInfo fileInfo, QVector<PageData> &
if(!pathsOnly)
{
int lastid = inserterQuery.lastInsertId().toInt();
if(!insertToFTS(false, db, lastid, pageData))
if(!insertToFTS(false, db, lastid, processResult.pages))
{
db.rollback();
Logger::error() << "Failed to insert data to FTS index " << Qt::endl;
return DBFAIL;
}
if(!insertToFTS(true, db, lastid, pageData))
if(!insertToFTS(true, db, lastid, processResult.pages))
{
db.rollback();
Logger::error() << "Failed to insert data to FTS index " << Qt::endl;
return DBFAIL;
}
if(!insertOutline(db, lastid, processResult.outlines))
{
db.rollback();
Logger::error() << "Failed to insert outline data " << Qt::endl;
return DBFAIL;
}
}
if(!db.commit())

Dosyayı Görüntüle

@ -5,7 +5,7 @@
#include "databasefactory.h"
#include "utils.h"
#include "pagedata.h"
#include "documentprocessresult.h"
#include "filedata.h"
#include "../shared/sqlitesearch.h"
#include "../shared/token.h"
@ -22,7 +22,7 @@ class SqliteDbService
public:
SqliteDbService(DatabaseFactory &dbFactory);
SaveFileResult saveFile(QFileInfo fileInfo, QVector<PageData> &pageData, bool pathsOnly);
SaveFileResult saveFile(QFileInfo fileInfo, DocumentProcessResult &pageData, bool pathsOnly);
bool deleteFile(QString path);
bool fileExistsInDatabase(QString path);
@ -42,6 +42,7 @@ class SqliteDbService
QVector<SearchResult> search(const LooqsQuery &query);
std::optional<QChar> queryFileType(QString absPath);
bool insertOutline(QSqlDatabase &db, int fileid, const QVector<DocumentOutlineEntry> &outlines);
};
#endif // SQLITEDBSERVICE_H

Dosyayı Görüntüle

@ -148,6 +148,11 @@ QPair<QString, QVector<QString>> SqliteSearch::createSql(const Token &token)
return {" file.id IN (SELECT fileid FROM filetag WHERE tagid = (SELECT id FROM tag WHERE name = ?)) ",
{value.toLower()}};
}
if(token.type == FILTER_OUTLINE_CONTAINS)
{
return {" outline.text LIKE '%' || ? || '%' ", {value.toLower()}};
}
throw LooqsGeneralException("Unknown token passed (should not happen)");
}
@ -156,6 +161,7 @@ QSqlQuery SqliteSearch::makeSqlQuery(const LooqsQuery &query)
QString whereSql;
QVector<QString> bindValues;
bool isContentSearch = (query.getTokensMask() & FILTER_CONTENT) == FILTER_CONTENT;
bool isOutlineSearch = query.hasOutlineSearch();
if(query.getTokens().isEmpty())
{
throw LooqsGeneralException("Nothing to search for supplied");
@ -200,15 +206,22 @@ QSqlQuery SqliteSearch::makeSqlQuery(const LooqsQuery &query)
}
else
{
QString pageColumn = "'0' as page";
QString joiners = "";
if(isOutlineSearch)
{
pageColumn = "outline.page as page";
joiners = " INNER JOIN outline ON outline.fileid = file.id ";
}
if(sortSql.isEmpty())
{
sortSql = "ORDER BY file.mtime DESC";
}
prepSql = "SELECT file.path AS path, '0' as page, file.mtime AS mtime, file.size AS size, file.filetype AS "
"filetype FROM file WHERE 1=1 AND " +
whereSql + " " + sortSql;
prepSql = "SELECT DISTINCT file.path AS path, " + pageColumn +
",file.mtime AS mtime, file.size AS size, "
"file.filetype AS filetype FROM file" +
joiners + " WHERE 1=1 AND " + whereSql + " " + sortSql;
}
if(query.getLimit() > 0)
{
prepSql += " LIMIT " + QString::number(query.getLimit());
@ -242,7 +255,7 @@ QVector<SearchResult> SqliteSearch::search(const LooqsQuery &query)
throw LooqsGeneralException("SQL Error: " + dbQuery.lastError().text());
}
bool contentSearch = query.hasContentSearch();
bool contentSearch = query.hasContentSearch() || query.hasOutlineSearch();
while(dbQuery.next())
{
SearchResult result;

Dosyayı Görüntüle

@ -4,11 +4,11 @@ TagStripperProcessor::TagStripperProcessor()
{
}
QVector<PageData> TagStripperProcessor::process(const QByteArray &data) const
DocumentProcessResult TagStripperProcessor::process(const QByteArray &data) const
{
auto result = DefaultTextProcessor::process(data);
// TODO: does not work properly with <br> and does not deal with entities...
result[0].content.remove(QRegExp("<[^>]*>"));
Q_ASSERT(result.pages.size() > 0);
result.pages[0].content.remove(QRegExp("<[^>]*>"));
return result;
}

Dosyayı Görüntüle

@ -8,7 +8,7 @@ class TagStripperProcessor : public DefaultTextProcessor
TagStripperProcessor();
public:
QVector<PageData> process(const QByteArray &data) const override;
DocumentProcessResult process(const QByteArray &data) const override;
};
#endif // XMLSTRIPPERPROCESSOR_H

Dosyayı Görüntüle

@ -20,6 +20,7 @@ enum TokenType
FILTER_PATH_ENDS,
FILTER_PATH_STARTS,
FILTER_TAG_ASSIGNED,
FILTER_OUTLINE_CONTAINS,
FILTER_CONTENT = 512, /* Everything below here is content search (except LIMIT) */
FILTER_CONTENT_CONTAINS,
FILTER_CONTENT_PAGE,