WIP/outlineindex #41
@ -46,6 +46,8 @@ packagesExist(quazip1-qt5) {
|
||||
}
|
||||
|
||||
INCLUDEPATH += $$PWD/../shared
|
||||
INCLUDEPATH += /usr/include/poppler/qt5/
|
||||
|
||||
DEPENDPATH += $$PWD/../shared
|
||||
|
||||
win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../shared/release/libshared.a
|
||||
|
@ -24,7 +24,7 @@ QSharedPointer<PreviewResult> PreviewGeneratorOdt::generate(RenderConfig config,
|
||||
throw LooqsGeneralException("Error while reading content.xml of " + documentPath);
|
||||
}
|
||||
TagStripperProcessor tsp;
|
||||
QString content = tsp.process(entireContent).constFirst().content;
|
||||
QString content = tsp.process(entireContent).pages.constFirst().content;
|
||||
|
||||
PreviewGeneratorPlainText plainTextGenerator;
|
||||
result->setText(plainTextGenerator.generatePreviewText(content, config, info.fileName()));
|
||||
|
@ -24,7 +24,9 @@ QString DefaultTextProcessor::processText(const QByteArray &data) const
|
||||
return {};
|
||||
}
|
||||
|
||||
QVector<PageData> DefaultTextProcessor::process(const QByteArray &data) const
|
||||
DocumentProcessResult DefaultTextProcessor::process(const QByteArray &data) const
|
||||
{
|
||||
return {{0, processText(data)}};
|
||||
DocumentProcessResult result;
|
||||
result.pages.append({0, processText(data)});
|
||||
return result;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ class DefaultTextProcessor : public Processor
|
||||
public:
|
||||
DefaultTextProcessor();
|
||||
QString processText(const QByteArray &data) const;
|
||||
QVector<PageData> process(const QByteArray &data) const override;
|
||||
DocumentProcessResult process(const QByteArray &data) const override;
|
||||
};
|
||||
|
||||
#endif // DEFAULTTEXTPROCESSOR_H
|
||||
|
39
shared/documentprocessresult.cpp
Normal file
39
shared/documentprocessresult.cpp
Normal file
@ -0,0 +1,39 @@
|
||||
#include "documentprocessresult.h"
|
||||
|
||||
QDataStream &operator<<(QDataStream &out, const DocumentProcessResult &pd)
|
||||
{
|
||||
out << pd.pages.size();
|
||||
out << pd.outlines.size();
|
||||
for(const PageData &pd : pd.pages)
|
||||
{
|
||||
out << pd;
|
||||
}
|
||||
for(const DocumentOutlineEntry &outline : pd.outlines)
|
||||
{
|
||||
out << outline;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
QDataStream &operator>>(QDataStream &in, DocumentProcessResult &pd)
|
||||
{
|
||||
int numPages, numOutlines;
|
||||
in >> numPages;
|
||||
in >> numOutlines;
|
||||
|
||||
for(int i = 0; i < numPages; i++)
|
||||
{
|
||||
PageData data;
|
||||
in >> data;
|
||||
pd.pages.append(data);
|
||||
}
|
||||
|
||||
for(int i = 0; i < numOutlines; i++)
|
||||
{
|
||||
DocumentOutlineEntry outline;
|
||||
in >> outline;
|
||||
pd.outlines.append(outline);
|
||||
}
|
||||
|
||||
return in;
|
||||
}
|
17
shared/documentprocessresult.h
Normal file
17
shared/documentprocessresult.h
Normal file
@ -0,0 +1,17 @@
|
||||
#ifndef DOCUMENTPROCESSRESULT_H
|
||||
#define DOCUMENTPROCESSRESULT_H
|
||||
#include <pagedata.h>
|
||||
#include <documentoutlineentry.h>
|
||||
|
||||
class DocumentProcessResult
|
||||
{
|
||||
public:
|
||||
QVector<PageData> pages;
|
||||
QVector<DocumentOutlineEntry> outlines;
|
||||
};
|
||||
Q_DECLARE_METATYPE(DocumentProcessResult);
|
||||
|
||||
QDataStream &operator<<(QDataStream &out, const DocumentProcessResult &pd);
|
||||
QDataStream &operator>>(QDataStream &in, DocumentProcessResult &pd);
|
||||
|
||||
#endif // DOCUMENTPROCESSRESULT_H
|
@ -110,7 +110,7 @@ int FileSaver::processFiles(const QVector<QString> paths, std::function<SaveFile
|
||||
|
||||
SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
|
||||
{
|
||||
QVector<PageData> pageData;
|
||||
DocumentProcessResult processResult;
|
||||
QString canonicalPath = fileInfo.canonicalFilePath();
|
||||
|
||||
int processorReturnCode = -1;
|
||||
@ -169,11 +169,10 @@ SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
|
||||
* finishes.
|
||||
*/
|
||||
QDataStream in(process.readAllStandardOutput());
|
||||
while(!in.atEnd())
|
||||
|
||||
if(!in.atEnd())
|
||||
{
|
||||
PageData pd;
|
||||
in >> pd;
|
||||
pageData.append(pd);
|
||||
in >> processResult;
|
||||
}
|
||||
processorReturnCode = process.exitCode();
|
||||
if(processorReturnCode != OK && processorReturnCode != OK_WASEMPTY)
|
||||
@ -185,7 +184,7 @@ SaveFileResult FileSaver::saveFile(const QFileInfo &fileInfo)
|
||||
}
|
||||
}
|
||||
}
|
||||
SaveFileResult result = this->dbService->saveFile(fileInfo, pageData, this->fileSaverOptions.metadataOnly);
|
||||
SaveFileResult result = this->dbService->saveFile(fileInfo, processResult, this->fileSaverOptions.metadataOnly);
|
||||
if(result == OK && processorReturnCode == OK_WASEMPTY)
|
||||
{
|
||||
return OK_WASEMPTY;
|
||||
|
@ -29,6 +29,11 @@ bool LooqsQuery::hasContentSearch() const
|
||||
return (this->getTokensMask() & FILTER_CONTENT) == FILTER_CONTENT;
|
||||
}
|
||||
|
||||
bool LooqsQuery::hasOutlineSearch() const
|
||||
{
|
||||
return (this->getTokensMask() & FILTER_OUTLINE_CONTAINS) == FILTER_OUTLINE_CONTAINS;
|
||||
}
|
||||
|
||||
bool LooqsQuery::hasPathSearch() const
|
||||
{
|
||||
return (this->getTokensMask() & FILTER_PATH) == FILTER_PATH;
|
||||
@ -289,6 +294,10 @@ LooqsQuery LooqsQuery::build(QString expression, TokenType loneWordsTokenType, b
|
||||
{
|
||||
tokenType = FILTER_TAG_ASSIGNED;
|
||||
}
|
||||
else if(filtername == "toc" || filtername == "outline")
|
||||
{
|
||||
tokenType = FILTER_OUTLINE_CONTAINS;
|
||||
}
|
||||
// TODO: given this is not really a "filter", this feels slightly misplaced here
|
||||
else if(filtername == "sort")
|
||||
{
|
||||
|
@ -68,6 +68,7 @@ class LooqsQuery
|
||||
this->limit = limit;
|
||||
}
|
||||
bool hasContentSearch() const;
|
||||
bool hasOutlineSearch() const;
|
||||
bool hasPathSearch() const;
|
||||
|
||||
void addSortCondition(SortCondition sc);
|
||||
|
@ -10,7 +10,7 @@ class NothingProcessor : public Processor
|
||||
NothingProcessor();
|
||||
|
||||
public:
|
||||
QVector<PageData> process(const QByteArray & /*data*/) const override
|
||||
DocumentProcessResult process(const QByteArray & /*data*/) const override
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
@ -3,12 +3,12 @@
|
||||
#include "odtprocessor.h"
|
||||
#include "tagstripperprocessor.h"
|
||||
|
||||
QVector<PageData> OdtProcessor::process(const QByteArray & /*data*/) const
|
||||
DocumentProcessResult OdtProcessor::process(const QByteArray & /*data*/) const
|
||||
{
|
||||
throw LooqsGeneralException("Not implemented yet");
|
||||
}
|
||||
|
||||
QVector<PageData> OdtProcessor::process(QString path) const
|
||||
DocumentProcessResult OdtProcessor::process(QString path) const
|
||||
{
|
||||
QuaZipFile zipFile(path);
|
||||
zipFile.setFileName("content.xml");
|
||||
|
@ -8,9 +8,9 @@ class OdtProcessor : public Processor
|
||||
{
|
||||
this->PREFERED_DATA_SOURCE = FILEPATH;
|
||||
}
|
||||
QVector<PageData> process(const QByteArray &data) const override;
|
||||
DocumentProcessResult process(const QByteArray &data) const override;
|
||||
|
||||
QVector<PageData> process(QString path) const override;
|
||||
DocumentProcessResult process(QString path) const override;
|
||||
};
|
||||
|
||||
#endif // ODTPROCESSOR_H
|
||||
|
@ -5,9 +5,30 @@ PdfProcessor::PdfProcessor()
|
||||
{
|
||||
}
|
||||
|
||||
QVector<PageData> PdfProcessor::process(const QByteArray &data) const
|
||||
QVector<DocumentOutlineEntry> PdfProcessor::createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const
|
||||
{
|
||||
QVector<PageData> result;
|
||||
QVector<DocumentOutlineEntry> result;
|
||||
for(const Poppler::OutlineItem &outlineItem : outlineItems)
|
||||
{
|
||||
DocumentOutlineEntry documentOutlineEntry;
|
||||
documentOutlineEntry.text = outlineItem.name();
|
||||
documentOutlineEntry.type = OUTLINE_DESTINATION_TYPE_PAGE;
|
||||
if(!outlineItem.destination().isNull())
|
||||
{
|
||||
documentOutlineEntry.destinationPage = outlineItem.destination()->pageNumber();
|
||||
}
|
||||
if(outlineItem.hasChildren())
|
||||
{
|
||||
documentOutlineEntry.children = createOutline(outlineItem.children());
|
||||
}
|
||||
result.append(documentOutlineEntry);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
DocumentProcessResult PdfProcessor::process(const QByteArray &data) const
|
||||
{
|
||||
DocumentProcessResult result;
|
||||
QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data));
|
||||
if(doc.isNull())
|
||||
{
|
||||
@ -26,12 +47,13 @@ QVector<PageData> PdfProcessor::process(const QByteArray &data) const
|
||||
for(auto i = 0; i < pagecount; i++)
|
||||
{
|
||||
QString text = doc->page(i)->text(entirePage);
|
||||
result.append({static_cast<unsigned int>(i + 1), text});
|
||||
result.pages.append({static_cast<unsigned int>(i + 1), text});
|
||||
/*TODO: hack, so we can fts search several words over the whole document, not just pages.
|
||||
* this of course uses more space and should be solved differently.
|
||||
*/
|
||||
entire += text;
|
||||
}
|
||||
result.append({0, entire});
|
||||
result.pages.append({0, entire});
|
||||
result.outlines = createOutline(doc->outline());
|
||||
return result;
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#ifndef PDFPROCESSOR_H
|
||||
#define PDFPROCESSOR_H
|
||||
#include <poppler-qt5.h>
|
||||
#include "processor.h"
|
||||
class PdfProcessor : public Processor
|
||||
{
|
||||
@ -7,7 +8,8 @@ class PdfProcessor : public Processor
|
||||
PdfProcessor();
|
||||
|
||||
public:
|
||||
QVector<PageData> process(const QByteArray &data) const override;
|
||||
QVector<DocumentOutlineEntry> createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const;
|
||||
DocumentProcessResult process(const QByteArray &data) const override;
|
||||
};
|
||||
|
||||
#endif // PDFPROCESSOR_H
|
||||
|
@ -2,8 +2,8 @@
|
||||
#define PROCESSOR_H
|
||||
#include <QVector>
|
||||
#include <QFile>
|
||||
#include "pagedata.h"
|
||||
#include "utils.h"
|
||||
#include "documentprocessresult.h"
|
||||
enum DataSource
|
||||
{
|
||||
FILEPATH,
|
||||
@ -18,8 +18,8 @@ class Processor
|
||||
* a single file */
|
||||
DataSource PREFERED_DATA_SOURCE = ARRAY;
|
||||
Processor();
|
||||
virtual QVector<PageData> process(const QByteArray &data) const = 0;
|
||||
virtual QVector<PageData> process(QString path) const
|
||||
virtual DocumentProcessResult process(const QByteArray &data) const = 0;
|
||||
virtual DocumentProcessResult process(QString path) const
|
||||
{
|
||||
return process(Utils::readFile(path));
|
||||
}
|
||||
|
@ -65,18 +65,12 @@ void SandboxedProcessor::enableSandbox(QString readablePath)
|
||||
exile_free_policy(policy);
|
||||
}
|
||||
|
||||
void SandboxedProcessor::printResults(const QVector<PageData> &pageData)
|
||||
void SandboxedProcessor::printResults(const DocumentProcessResult &result)
|
||||
{
|
||||
QFile fsstdout;
|
||||
fsstdout.open(stdout, QIODevice::WriteOnly);
|
||||
QDataStream stream(&fsstdout);
|
||||
|
||||
for(const PageData &data : pageData)
|
||||
{
|
||||
stream << data;
|
||||
// fsstdout.flush();
|
||||
}
|
||||
|
||||
stream << result;
|
||||
fsstdout.close();
|
||||
}
|
||||
|
||||
@ -102,7 +96,7 @@ SaveFileResult SandboxedProcessor::process()
|
||||
return OK;
|
||||
}
|
||||
|
||||
QVector<PageData> pageData;
|
||||
DocumentProcessResult processResult;
|
||||
QString absPath = fileInfo.absoluteFilePath();
|
||||
|
||||
try
|
||||
@ -111,13 +105,13 @@ SaveFileResult SandboxedProcessor::process()
|
||||
{
|
||||
/* Read access to FS needed... doh..*/
|
||||
enableSandbox(absPath);
|
||||
pageData = processor->process(absPath);
|
||||
processResult = processor->process(absPath);
|
||||
}
|
||||
else
|
||||
{
|
||||
QByteArray data = Utils::readFile(absPath);
|
||||
enableSandbox();
|
||||
pageData = processor->process(data);
|
||||
processResult = processor->process(data);
|
||||
}
|
||||
}
|
||||
catch(LooqsGeneralException &e)
|
||||
@ -126,6 +120,6 @@ SaveFileResult SandboxedProcessor::process()
|
||||
return PROCESSFAIL;
|
||||
}
|
||||
|
||||
printResults(pageData);
|
||||
return pageData.isEmpty() ? OK_WASEMPTY : OK;
|
||||
printResults(processResult);
|
||||
return processResult.pages.isEmpty() ? OK_WASEMPTY : OK;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
#define SANDBOXEDPROCESSOR_H
|
||||
#include <QString>
|
||||
#include <QMimeDatabase>
|
||||
#include "pagedata.h"
|
||||
#include "documentprocessresult.h"
|
||||
#include "savefileresult.h"
|
||||
|
||||
class SandboxedProcessor
|
||||
@ -12,7 +12,7 @@ class SandboxedProcessor
|
||||
QMimeDatabase mimeDatabase;
|
||||
|
||||
void enableSandbox(QString readablePath = "");
|
||||
void printResults(const QVector<PageData> &pageData);
|
||||
void printResults(const DocumentProcessResult &pageData);
|
||||
|
||||
public:
|
||||
SandboxedProcessor(QString filepath)
|
||||
|
@ -42,6 +42,8 @@ SOURCES += sqlitesearch.cpp \
|
||||
dbmigrator.cpp \
|
||||
defaulttextprocessor.cpp \
|
||||
dirscanworker.cpp \
|
||||
documentoutlineentry.cpp \
|
||||
documentprocessresult.cpp \
|
||||
encodingdetector.cpp \
|
||||
filesaver.cpp \
|
||||
filescanworker.cpp \
|
||||
@ -72,6 +74,8 @@ HEADERS += sqlitesearch.h \
|
||||
dbmigrator.h \
|
||||
defaulttextprocessor.h \
|
||||
dirscanworker.h \
|
||||
documentoutlineentry.h \
|
||||
documentprocessresult.h \
|
||||
encodingdetector.h \
|
||||
filedata.h \
|
||||
filesaver.h \
|
||||
|
@ -253,6 +253,29 @@ bool SqliteDbService::insertToFTS(bool useTrigrams, QSqlDatabase &db, int fileid
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SqliteDbService::insertOutline(QSqlDatabase &db, int fileid, const QVector<DocumentOutlineEntry> &outlines)
|
||||
{
|
||||
QSqlQuery outlineQuery(db);
|
||||
outlineQuery.prepare("INSERT INTO outline(fileid, text, page) VALUES(?,?,?)");
|
||||
outlineQuery.addBindValue(fileid);
|
||||
for(const DocumentOutlineEntry &outline : outlines)
|
||||
{
|
||||
outlineQuery.bindValue(1, outline.text.toLower());
|
||||
outlineQuery.bindValue(2, outline.destinationPage);
|
||||
if(!outlineQuery.exec())
|
||||
{
|
||||
Logger::error() << "Failed outline insertion " << outlineQuery.lastError() << Qt::endl;
|
||||
return false;
|
||||
}
|
||||
if(!insertOutline(db, fileid, outline.children))
|
||||
{
|
||||
Logger::error() << "Failed outline insertion (children)) " << outlineQuery.lastError() << Qt::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
QSqlQuery SqliteDbService::exec(QString querystr, std::initializer_list<QVariant> args)
|
||||
{
|
||||
auto query = QSqlQuery(dbFactory->forCurrentThread());
|
||||
@ -278,7 +301,7 @@ bool SqliteDbService::execBool(QString querystr, std::initializer_list<QVariant>
|
||||
return query.value(0).toBool();
|
||||
}
|
||||
|
||||
SaveFileResult SqliteDbService::saveFile(QFileInfo fileInfo, QVector<PageData> &pageData, bool pathsOnly)
|
||||
SaveFileResult SqliteDbService::saveFile(QFileInfo fileInfo, DocumentProcessResult &processResult, bool pathsOnly)
|
||||
{
|
||||
QString absPath = fileInfo.absoluteFilePath();
|
||||
auto mtime = fileInfo.lastModified().toSecsSinceEpoch();
|
||||
@ -323,18 +346,24 @@ SaveFileResult SqliteDbService::saveFile(QFileInfo fileInfo, QVector<PageData> &
|
||||
if(!pathsOnly)
|
||||
{
|
||||
int lastid = inserterQuery.lastInsertId().toInt();
|
||||
if(!insertToFTS(false, db, lastid, pageData))
|
||||
if(!insertToFTS(false, db, lastid, processResult.pages))
|
||||
{
|
||||
db.rollback();
|
||||
Logger::error() << "Failed to insert data to FTS index " << Qt::endl;
|
||||
return DBFAIL;
|
||||
}
|
||||
if(!insertToFTS(true, db, lastid, pageData))
|
||||
if(!insertToFTS(true, db, lastid, processResult.pages))
|
||||
{
|
||||
db.rollback();
|
||||
Logger::error() << "Failed to insert data to FTS index " << Qt::endl;
|
||||
return DBFAIL;
|
||||
}
|
||||
if(!insertOutline(db, lastid, processResult.outlines))
|
||||
{
|
||||
db.rollback();
|
||||
Logger::error() << "Failed to insert outline data " << Qt::endl;
|
||||
return DBFAIL;
|
||||
}
|
||||
}
|
||||
|
||||
if(!db.commit())
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
#include "databasefactory.h"
|
||||
#include "utils.h"
|
||||
#include "pagedata.h"
|
||||
#include "documentprocessresult.h"
|
||||
#include "filedata.h"
|
||||
#include "../shared/sqlitesearch.h"
|
||||
#include "../shared/token.h"
|
||||
@ -22,7 +22,7 @@ class SqliteDbService
|
||||
|
||||
public:
|
||||
SqliteDbService(DatabaseFactory &dbFactory);
|
||||
SaveFileResult saveFile(QFileInfo fileInfo, QVector<PageData> &pageData, bool pathsOnly);
|
||||
SaveFileResult saveFile(QFileInfo fileInfo, DocumentProcessResult &pageData, bool pathsOnly);
|
||||
|
||||
bool deleteFile(QString path);
|
||||
bool fileExistsInDatabase(QString path);
|
||||
@ -42,6 +42,7 @@ class SqliteDbService
|
||||
QVector<SearchResult> search(const LooqsQuery &query);
|
||||
|
||||
std::optional<QChar> queryFileType(QString absPath);
|
||||
bool insertOutline(QSqlDatabase &db, int fileid, const QVector<DocumentOutlineEntry> &outlines);
|
||||
};
|
||||
|
||||
#endif // SQLITEDBSERVICE_H
|
||||
|
@ -148,6 +148,11 @@ QPair<QString, QVector<QString>> SqliteSearch::createSql(const Token &token)
|
||||
return {" file.id IN (SELECT fileid FROM filetag WHERE tagid = (SELECT id FROM tag WHERE name = ?)) ",
|
||||
{value.toLower()}};
|
||||
}
|
||||
if(token.type == FILTER_OUTLINE_CONTAINS)
|
||||
{
|
||||
return {" outline.text LIKE '%' || ? || '%' ", {value.toLower()}};
|
||||
}
|
||||
|
||||
throw LooqsGeneralException("Unknown token passed (should not happen)");
|
||||
}
|
||||
|
||||
@ -156,6 +161,7 @@ QSqlQuery SqliteSearch::makeSqlQuery(const LooqsQuery &query)
|
||||
QString whereSql;
|
||||
QVector<QString> bindValues;
|
||||
bool isContentSearch = (query.getTokensMask() & FILTER_CONTENT) == FILTER_CONTENT;
|
||||
bool isOutlineSearch = query.hasOutlineSearch();
|
||||
if(query.getTokens().isEmpty())
|
||||
{
|
||||
throw LooqsGeneralException("Nothing to search for supplied");
|
||||
@ -200,15 +206,22 @@ QSqlQuery SqliteSearch::makeSqlQuery(const LooqsQuery &query)
|
||||
}
|
||||
else
|
||||
{
|
||||
QString pageColumn = "'0' as page";
|
||||
QString joiners = "";
|
||||
if(isOutlineSearch)
|
||||
{
|
||||
pageColumn = "outline.page as page";
|
||||
joiners = " INNER JOIN outline ON outline.fileid = file.id ";
|
||||
}
|
||||
if(sortSql.isEmpty())
|
||||
{
|
||||
sortSql = "ORDER BY file.mtime DESC";
|
||||
}
|
||||
prepSql = "SELECT file.path AS path, '0' as page, file.mtime AS mtime, file.size AS size, file.filetype AS "
|
||||
"filetype FROM file WHERE 1=1 AND " +
|
||||
whereSql + " " + sortSql;
|
||||
prepSql = "SELECT DISTINCT file.path AS path, " + pageColumn +
|
||||
",file.mtime AS mtime, file.size AS size, "
|
||||
"file.filetype AS filetype FROM file" +
|
||||
joiners + " WHERE 1=1 AND " + whereSql + " " + sortSql;
|
||||
}
|
||||
|
||||
if(query.getLimit() > 0)
|
||||
{
|
||||
prepSql += " LIMIT " + QString::number(query.getLimit());
|
||||
@ -242,7 +255,7 @@ QVector<SearchResult> SqliteSearch::search(const LooqsQuery &query)
|
||||
throw LooqsGeneralException("SQL Error: " + dbQuery.lastError().text());
|
||||
}
|
||||
|
||||
bool contentSearch = query.hasContentSearch();
|
||||
bool contentSearch = query.hasContentSearch() || query.hasOutlineSearch();
|
||||
while(dbQuery.next())
|
||||
{
|
||||
SearchResult result;
|
||||
|
@ -4,11 +4,11 @@ TagStripperProcessor::TagStripperProcessor()
|
||||
{
|
||||
}
|
||||
|
||||
QVector<PageData> TagStripperProcessor::process(const QByteArray &data) const
|
||||
DocumentProcessResult TagStripperProcessor::process(const QByteArray &data) const
|
||||
{
|
||||
auto result = DefaultTextProcessor::process(data);
|
||||
// TODO: does not work properly with <br> and does not deal with entities...
|
||||
|
||||
result[0].content.remove(QRegExp("<[^>]*>"));
|
||||
Q_ASSERT(result.pages.size() > 0);
|
||||
result.pages[0].content.remove(QRegExp("<[^>]*>"));
|
||||
return result;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ class TagStripperProcessor : public DefaultTextProcessor
|
||||
TagStripperProcessor();
|
||||
|
||||
public:
|
||||
QVector<PageData> process(const QByteArray &data) const override;
|
||||
DocumentProcessResult process(const QByteArray &data) const override;
|
||||
};
|
||||
|
||||
#endif // XMLSTRIPPERPROCESSOR_H
|
||||
|
@ -20,6 +20,7 @@ enum TokenType
|
||||
FILTER_PATH_ENDS,
|
||||
FILTER_PATH_STARTS,
|
||||
FILTER_TAG_ASSIGNED,
|
||||
FILTER_OUTLINE_CONTAINS,
|
||||
FILTER_CONTENT = 512, /* Everything below here is content search (except LIMIT) */
|
||||
FILTER_CONTENT_CONTAINS,
|
||||
FILTER_CONTENT_PAGE,
|
||||
|
Loading…
x
Reference in New Issue
Block a user