Begin a C++ cli and remove the Python scripts

This commit is contained in:
Albert S. 2019-04-06 17:16:42 +02:00
rodzic 8e3585df38
commit 3e23021621
32 zmienionych plików z 788 dodań i 214 usunięć

167
addindex
Wyświetl plik

@ -1,167 +0,0 @@
#!/usr/bin/python3
import sqlite3
import os.path
import sys
import subprocess
import zipfile
import xml.etree.ElementTree
import re
import chardet
import config
from multiprocessing import Pool
class pagedata:
page = 0
content = ""
def singlepagelist(content):
result = pagedata()
result.content = content
result.page = 0
l = list();
l.append(result)
return l
def striptags(content):
result = ""
try:
result = ''.join(xml.etree.ElementTree.fromstring(content).itertext())
except:
#TODO: test<br>test2 will make it testtest2 not test test2
result = re.sub('<[^>]*>', '', content)
return result
def strip_irrelevant(content):
result = content.replace("\n", " ").replace("\t", " ")
result = re.sub(' +', ' ', result)
return result;
def process_pdf(path):
result = list()
args=["pdftotext", path, "-"]
stdout,stderr = subprocess.Popen(args,stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
content = strip_irrelevant(stdout.decode('utf-8'))
#it is faster to do it like this than to call pdfottext for each page
splitted = content.split("\f")
count=1
for page in splitted:
data = pagedata()
data.page = count
data.content = page
result.append(data)
count += 1
#TODO: current hack, so we can fts search several words over the whole document
#this of course uses more space, but in the end that's not a big problem
#Nevertheless, this remains a hack
everything = pagedata()
everything.page = 0
everything.content = content.replace("\f", "")
result.append(everything)
return result
def process_odt(path):
fd = zipfile.ZipFile(path)
content = fd.read("content.xml").decode("utf-8")
fd.close()
return singlepagelist(striptags(content))
def process_ods(path):
return process_odt(path)
def readtext(path):
fd = open(path, "rb")
content = fd.read()
fd.close()
result=""
try:
result = str(content.decode("utf-8"))
except:
try:
encoding = chardet.detect(content)["encoding"];
if encoding == None:
result = ""
else:
result = str(content.decode(encoding))
except:
print("FAILED DECODING: " + path)
result = ""
return result
def process_striptags(path):
content = readtext(path)
return singlepagelist(striptags(content))
def process_text(path):
return singlepagelist(readtext(path))
def process_nothing(path):
return list()
def exists(abspath, mtime):
cursor = dbcon.cursor()
cursor.execute("SELECT 1 FROM file WHERE path = ? AND mtime = ?" , (abspath, mtime))
result = cursor.fetchone()
if result != None and result[0] == 1:
return True
return False
def insert(path):
print("processing", path)
abspath=os.path.abspath(path)
mtime = int(os.stat(abspath).st_mtime)
if exists(abspath, mtime):
print("Leaving alone " + abspath + " because it wasn't changed")
return
basename=os.path.basename(abspath)
ext = os.path.splitext(abspath)[1]
content=""
processor=process_nothing
if ext in preprocess:
processor=preprocess[ext]
pagedatalist = processor(abspath)
#TODO: assumes sqlitehas been built with thread safety (and it is the default)
cursor = dbcon.cursor()
cursor.execute("BEGIN TRANSACTION")
cursor.execute("DELETE FROM file WHERE path = ?", (abspath,))
cursor.execute("INSERT INTO file(path, mtime) VALUES(?, ?) ", (abspath, mtime))
fileid=cursor.lastrowid
for pagedata in pagedatalist:
cursor.execute("INSERT INTO content(fileid, page, content) VALUES(?, ?, ?)", (fileid, pagedata.page, pagedata.content))
cursor.execute("COMMIT TRANSACTION")
preprocess={".pdf":process_pdf, ".odt":process_odt, ".ods":process_ods, ".html":process_striptags, ".xml":process_nothing, ".txt":process_text,
".sql":process_text, ".c":process_text, ".cpp":process_text, ".js":process_text, ".java":process_text,
".py":process_text, '.md':process_text}
def init():
global dbcon
dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
dbcon = None
if __name__ == '__main__':
with Pool(processes=4,initializer=init) as pool:
if len(sys.argv) < 2:
pool.map(insert, (l.replace("\n", "") for l in sys.stdin))
else:
pool.map(insert, sys.argv[1:])

8
cli/addfileexception.h Normal file
Wyświetl plik

@ -0,0 +1,8 @@
#ifndef ADDFILEEXCEPTION_H
#define ADDFILEEXCEPTION_H
#include <QException>
#include <QString>
class AddFileException : public QException
{
};
#endif // ADDFILEEXCEPTION_H

46
cli/cli.pro Normal file
Wyświetl plik

@ -0,0 +1,46 @@
QT -= gui
QT += sql concurrent
CONFIG += c++11 console
CONFIG -= app_bundle
# The following define makes your compiler emit warnings if you use
# any feature of Qt which as been marked deprecated (the exact warnings
# depend on your compiler). Please consult the documentation of the
# deprecated API in order to know how to port your code away from it.
DEFINES += QT_DEPRECATED_WARNINGS
# You can also make your code fail to compile if you use deprecated APIs.
# In order to do so, uncomment the following line.
# You can also select to disable deprecated APIs only up to a certain version of Qt.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
LIBS += -luchardet -lpoppler-qt5 -lquazip
SOURCES += \
main.cpp \
encodingdetector.cpp \
processor.cpp \
pdfprocessor.cpp \
defaulttextprocessor.cpp \
command.cpp \
commandadd.cpp \
tagstripperprocessor.cpp \
nothingprocessor.cpp \
odtprocessor.cpp \
utils.cpp \
odsprocessor.cpp \
qssgeneralexception.cpp
HEADERS += \
encodingdetector.h \
processor.h \
pagedata.h \
pdfprocessor.h \
defaulttextprocessor.h \
command.h \
commandadd.h \
tagstripperprocessor.h \
nothingprocessor.h \
odtprocessor.h \
utils.h \
odsprocessor.h \
qssgeneralexception.h
INCLUDEPATH += /usr/include/poppler/qt5/ /usr/include/quazip5

38
cli/command.cpp Normal file
Wyświetl plik

@ -0,0 +1,38 @@
#include <QFile>
#include <QThread>
#include <QDebug>
#include "command.h"
#include "qssgeneralexception.h"
bool Command::fileExistsInDatabase(QSqlDatabase &db, QString path, qint64 mtime)
{
auto query = QSqlQuery("SELECT 1 FROM file WHERE path = ? and mtime = ?", db);
query.addBindValue(path);
query.addBindValue(mtime);
if(!query.exec())
{
throw QSSGeneralException("Error while trying to query for file existance");
}
if(!query.next())
{
return false;
}
return query.value(0).toBool();
}
QSqlDatabase Command::dbConnection()
{
if(dbStore.hasLocalData())
{
return dbStore.localData();
}
QSqlDatabase db =
QSqlDatabase::addDatabase("QSQLITE", "QSS" + QString::number((quint64)QThread::currentThread(), 16));
db.setDatabaseName(this->dbConnectionString);
if(!db.open())
{
qDebug() << "Failed to open the database: " << this->dbConnectionString;
}
dbStore.setLocalData(db);
return db;
}

27
cli/command.h Normal file
Wyświetl plik

@ -0,0 +1,27 @@
#ifndef COMMAND_H
#define COMMAND_H
#include <QStringList>
#include <QSqlDatabase>
#include <QSqlQuery>
#include <QThreadStorage>
#include <QVariant>
class Command
{
protected:
bool fileExistsInDatabase(QSqlDatabase &db, QString path, qint64 mtime);
QByteArray readFile(QString path) const;
QString dbConnectionString;
QThreadStorage<QSqlDatabase> dbStore;
public:
Command(QString dbConnectionString)
{
this->dbConnectionString = dbConnectionString;
}
QSqlDatabase dbConnection();
virtual int handle(QStringList arguments) = 0;
virtual ~Command(){};
};
#endif // COMMAND_H

196
cli/commandadd.cpp Normal file
Wyświetl plik

@ -0,0 +1,196 @@
#include <QFileInfo>
#include <QDebug>
#include <QSqlQuery>
#include <QSqlError>
#include <QDateTime>
#include <QMap>
#include <QTextStream>
#include <QException>
#include <QCommandLineParser>
#include <QMutex>
#include <QMutexLocker>
#include <QtConcurrent/QtConcurrentMap>
#include "processor.h"
#include "pdfprocessor.h"
#include "commandadd.h"
#include "defaulttextprocessor.h"
#include "tagstripperprocessor.h"
#include "nothingprocessor.h"
#include "odtprocessor.h"
#include "odsprocessor.h"
#include "utils.h"
static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
static NothingProcessor *nothingProcessor = new NothingProcessor();
static OdtProcessor *odtProcessor = new OdtProcessor();
static OdsProcessor *odsProcessor = new OdsProcessor();
static QMap<QString, Processor *> processors{
{"pdf", new PdfProcessor()}, {"txt", defaultTextProcessor}, {"md", defaultTextProcessor},
{"py", defaultTextProcessor}, {"xml", nothingProcessor}, {"html", tagStripperProcessor},
{"java", defaultTextProcessor}, {"js", defaultTextProcessor}, {"cpp", defaultTextProcessor},
{"c", defaultTextProcessor}, {"sql", defaultTextProcessor}, {"odt", odtProcessor},
{"ods", odsProcessor}};
AddFileResult CommandAdd::addFile(QString path)
{
QFileInfo info(path);
QString absPath = info.absoluteFilePath();
auto mtime = info.lastModified().toSecsSinceEpoch();
QChar fileType = info.isDir() ? 'd' : 'f';
QSqlDatabase db = dbConnection();
if(fileExistsInDatabase(db, absPath, mtime))
{
return SKIPPED;
}
Processor *processor = processors.value(info.suffix(), nothingProcessor);
QVector<PageData> pageData;
if(processor->PREFERED_DATA_SOURCE == FILEPATH)
{
pageData = processor->process(absPath);
}
else
{
pageData = processor->process(Utils::readFile(absPath));
}
if(pageData.isEmpty())
{
qDebug() << "Could not get any content for " << absPath;
}
// Workaround to "database is locked" error. Perhaps try WAL mode etc.
// QMutexLocker locker(&writeMutex);
if(!db.transaction())
{
qDebug() << "Failed to open transaction for " << absPath << " : " << db.lastError();
return DBFAIL;
}
QSqlQuery delQuery("DELETE FROM file WHERE path = ?", db);
delQuery.addBindValue(absPath);
if(!delQuery.exec())
{
qDebug() << "Failed DELETE query" << delQuery.lastError();
db.rollback();
return DBFAIL;
}
QSqlQuery inserterQuery("INSERT INTO file(path, mtime, size, filetype) VALUES(?, ?, ?, ?)", db);
inserterQuery.addBindValue(absPath);
inserterQuery.addBindValue(mtime);
inserterQuery.addBindValue(info.size());
inserterQuery.addBindValue(fileType);
if(!inserterQuery.exec())
{
qDebug() << "Failed INSERT query" << inserterQuery.lastError();
db.rollback();
return DBFAIL;
}
int lastid = inserterQuery.lastInsertId().toInt();
for(PageData &data : pageData)
{
QSqlQuery contentQuery("INSERT INTO content(fileid, page, content) VALUES(?, ?, ?)", db);
contentQuery.addBindValue(lastid);
contentQuery.addBindValue(data.pagenumber);
contentQuery.addBindValue(data.content);
if(!contentQuery.exec())
{
db.rollback();
qDebug() << "Failed content insertion " << contentQuery.lastError();
return DBFAIL;
}
}
if(!db.commit())
{
db.rollback();
qDebug() << "Failed to commit transaction for " << absPath << " : " << db.lastError();
return DBFAIL;
}
return OK;
}
int CommandAdd::handle(QStringList arguments)
{
QCommandLineParser parser;
parser.addOptions({{{"c", "continue"}, "Continue adding files, don't exit on first error"},
{{"a", "all"}, "On error, no files should be added, even already processed ones"},
{{"v", "verbose"}, "Print skipped and added files"},
{{"n", "threads"}, "Number of threads to use.", "threads"}});
parser.addHelpOption();
parser.addPositionalArgument("paths", "List of paths to process/add to the index", "[paths...]");
parser.process(arguments);
bool keepGoing = false;
bool verbose = false;
if(parser.isSet("continue"))
{
keepGoing = true;
}
if(parser.isSet("verbose"))
{
verbose = true;
}
if(parser.isSet("all"))
{
throw QSSGeneralException("To be implemented");
}
if(parser.isSet("threads"))
{
QString threadsCount = parser.value("threads");
QThreadPool::globalInstance()->setMaxThreadCount(threadsCount.toInt());
}
QStringList files = parser.positionalArguments();
if(files.length() == 0)
{
QTextStream stream(stdin);
while(!stream.atEnd())
{
QString path = stream.readLine();
files.append(path);
}
}
bool terminate = false;
QtConcurrent::blockingMap(files,
[&](QString &path)
{
if(terminate)
{
return;
}
if(verbose)
{
qDebug() << "Processing " << path;
}
auto result = addFile(path);
if(result == DBFAIL)
{
qDebug() << "Failed to add " << path;
if(!keepGoing)
{
terminate = true;
}
}
if(verbose)
{
if(result == SKIPPED)
{
qDebug() << "SKIPPED" << path << "as it already exists in the database";
}
else
{
qDebug() << "Added" << path;
}
}
});
return 0;
}

24
cli/commandadd.h Normal file
Wyświetl plik

@ -0,0 +1,24 @@
#ifndef COMMANDADD_H
#define COMMANDADD_H
#include <QMutex>
#include "command.h"
enum AddFileResult
{
OK,
SKIPPED,
DBFAIL
};
class CommandAdd : public Command
{
private:
AddFileResult addFile(QString path);
QMutex writeMutex;
public:
using Command::Command;
int handle(QStringList arguments) override;
};
#endif // COMMANDADD_H

Wyświetl plik

@ -0,0 +1,30 @@
#include <QFile>
#include <QDataStream>
#include <QTextCodec>
#include <QDebug>
#include "defaulttextprocessor.h"
DefaultTextProcessor::DefaultTextProcessor()
{
}
QString DefaultTextProcessor::processText(const QByteArray &data) const
{
QString encoding = encodingDetector.detectEncoding(data);
if(!encoding.isEmpty())
{
QTextCodec *codec = QTextCodec::codecForName(encoding.toUtf8());
if(codec != nullptr)
{
return codec->toUnicode(data);
}
qWarning() << "No codec found for " << encoding;
return QString(data);
}
return {};
}
QVector<PageData> DefaultTextProcessor::process(const QByteArray &data) const
{
return {{0, processText(data)}};
}

Wyświetl plik

@ -0,0 +1,17 @@
#ifndef DEFAULTTEXTPROCESSOR_H
#define DEFAULTTEXTPROCESSOR_H
#include "processor.h"
#include "encodingdetector.h"
class DefaultTextProcessor : public Processor
{
protected:
EncodingDetector encodingDetector;
public:
DefaultTextProcessor();
QString processText(const QByteArray &data) const;
QVector<PageData> process(const QByteArray &data) const override;
};
#endif // DEFAULTTEXTPROCESSOR_H

45
cli/encodingdetector.cpp Normal file
Wyświetl plik

@ -0,0 +1,45 @@
#include <QDataStream>
#include "encodingdetector.h"
#include <qssgeneralexception.h>
EncodingDetector::EncodingDetector()
{
}
QString EncodingDetector::detectEncoding(const QByteArray &data) const
{
uchardet_t detector = uchardet_new();
if(uchardet_handle_data(detector, data.data(), data.size()) != 0)
{
uchardet_delete(detector);
throw QSSGeneralException("Decoder failed");
}
uchardet_data_end(detector);
QString encoding = uchardet_get_charset(detector);
uchardet_delete(detector);
return encoding;
}
QString EncodingDetector::detectEncoding(QDataStream &s) const
{
uchardet_t detector = uchardet_new();
char buffer[4096];
int n;
while((n = s.readRawData(buffer, sizeof(buffer))) > 0)
{
if(uchardet_handle_data(detector, buffer, n) != 0)
{
uchardet_delete(detector);
throw QSSGeneralException("Decoder failed");
}
}
if(n == -1)
{
uchardet_delete(detector);
throw QSSGeneralException("Read failed");
}
uchardet_data_end(detector);
QString encoding = uchardet_get_charset(detector);
uchardet_delete(detector);
return encoding;
}

14
cli/encodingdetector.h Normal file
Wyświetl plik

@ -0,0 +1,14 @@
#ifndef ENCODINGDETECTOR_H
#define ENCODINGDETECTOR_H
#include <QString>
#include <uchardet/uchardet.h>
class EncodingDetector
{
public:
EncodingDetector();
QString detectEncoding(const QByteArray &data) const;
QString detectEncoding(QDataStream &s) const;
};
#endif // ENCODINGDETECTOR_H

73
cli/main.cpp Normal file
Wyświetl plik

@ -0,0 +1,73 @@
#include <QCoreApplication>
#include <QFile>
#include <QFileInfo>
#include <QDateTime>
#include <QDataStream>
#include <QDebug>
#include <QProcessEnvironment>
#include <QSqlDatabase>
#include <QSqlQuery>
#include <QSqlError>
#include <QMap>
#include <QDebug>
#include <functional>
#include <exception>
#include "encodingdetector.h"
#include "pdfprocessor.h"
#include "defaulttextprocessor.h"
#include "command.h"
#include "commandadd.h"
void printUsage(QString argv0)
{
qInfo() << "Usage: " << argv0 << "command";
}
Command *commandFromName(QString name, QString connectionstring)
{
if(name == "add")
{
return new CommandAdd(connectionstring);
}
if(name == "delete")
{
}
if(name == "update")
{
}
if(name == "search")
{
}
return nullptr;
}
int main(int argc, char *argv[])
{
QCoreApplication app(argc, argv);
QStringList args = app.arguments();
QString argv0 = args.takeFirst();
if(args.length() < 1)
{
printUsage(argv0);
exit(1);
}
QString commandName = args.first();
Command *cmd = commandFromName(commandName, QProcessEnvironment::systemEnvironment().value("QSS_PATH"));
if(cmd != nullptr)
{
try
{
return cmd->handle(args);
}
catch(const QSSGeneralException &e)
{
qDebug() << "Exception caught, message: " << e.message;
}
}
else
{
qDebug() << "Unknown command " << commandName;
}
return 1;
}

5
cli/nothingprocessor.cpp Normal file
Wyświetl plik

@ -0,0 +1,5 @@
#include "nothingprocessor.h"
NothingProcessor::NothingProcessor()
{
}

19
cli/nothingprocessor.h Normal file
Wyświetl plik

@ -0,0 +1,19 @@
#ifndef NOTHINGPROCESSOR_H
#define NOTHINGPROCESSOR_H
#include <QVector>
#include "processor.h"
#include "pagedata.h"
class NothingProcessor : public Processor
{
public:
NothingProcessor();
public:
QVector<PageData> process(const QByteArray &data) const override
{
return {};
}
};
#endif // NOTHINGPROCESSOR_H

5
cli/odsprocessor.cpp Normal file
Wyświetl plik

@ -0,0 +1,5 @@
#include "odsprocessor.h"
OdsProcessor::OdsProcessor()
{
}

10
cli/odsprocessor.h Normal file
Wyświetl plik

@ -0,0 +1,10 @@
#ifndef ODSPROCESSOR_H
#define ODSPROCESSOR_H
#include "odtprocessor.h"
class OdsProcessor : public OdtProcessor
{
public:
OdsProcessor();
};
#endif // ODSPROCESSOR_H

26
cli/odtprocessor.cpp Normal file
Wyświetl plik

@ -0,0 +1,26 @@
#include <quazip5/quazip.h>
#include <quazip5/quazipfile.h>
#include "odtprocessor.h"
#include "tagstripperprocessor.h"
QVector<PageData> OdtProcessor::process(const QByteArray &data) const
{
throw QSSGeneralException("Not implemented yet");
}
QVector<PageData> OdtProcessor::process(QString path) const
{
QuaZipFile zipFile(path);
zipFile.setFileName("content.xml");
if(!zipFile.open(QIODevice::ReadOnly))
{
throw QSSGeneralException("Error while opening file " + path);
}
QByteArray entireContent = zipFile.readAll();
if(entireContent.isEmpty())
{
throw QSSGeneralException("Error while reading content.xml of " + path);
}
TagStripperProcessor tsp;
return tsp.process(entireContent);
}

16
cli/odtprocessor.h Normal file
Wyświetl plik

@ -0,0 +1,16 @@
#ifndef ODTPROCESSOR_H
#define ODTPROCESSOR_H
#include "processor.h"
class OdtProcessor : public Processor
{
public:
OdtProcessor()
{
this->PREFERED_DATA_SOURCE = FILEPATH;
}
QVector<PageData> process(const QByteArray &data) const override;
QVector<PageData> process(QString path) const override;
};
#endif // ODTPROCESSOR_H

19
cli/pagedata.h Normal file
Wyświetl plik

@ -0,0 +1,19 @@
#ifndef PAGEDATA_H
#define PAGEDATA_H
#include <QString>
class PageData
{
public:
unsigned int pagenumber = 0;
QString content;
PageData()
{
}
PageData(unsigned int pagenumber, QString content)
{
this->pagenumber = pagenumber;
this->content = content;
}
};
#endif // PAGEDATA_H

32
cli/pdfprocessor.cpp Normal file
Wyświetl plik

@ -0,0 +1,32 @@
#include <QScopedPointer>
#include <poppler-qt5.h>
#include "pdfprocessor.h"
PdfProcessor::PdfProcessor()
{
}
QVector<PageData> PdfProcessor::process(const QByteArray &data) const
{
QVector<PageData> result;
QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data));
if(doc.isNull())
{
return {};
}
QRectF entirePage;
auto pagecount = doc->numPages();
QString entire;
entire.reserve(data.size()); // TODO too much
for(auto i = 0; i < pagecount; i++)
{
QString text = doc->page(i)->text(entirePage);
result.append({static_cast<unsigned int>(i + 1), text});
/*TODO: hack, so we can fts search several words over the whole document, not just pages.
* this of course uses more space and should be solved differently.
*/
entire += text;
}
result.append({0, entire});
return result;
}

13
cli/pdfprocessor.h Normal file
Wyświetl plik

@ -0,0 +1,13 @@
#ifndef PDFPROCESSOR_H
#define PDFPROCESSOR_H
#include "processor.h"
class PdfProcessor : public Processor
{
public:
PdfProcessor();
public:
QVector<PageData> process(const QByteArray &data) const override;
};
#endif // PDFPROCESSOR_H

5
cli/processor.cpp Normal file
Wyświetl plik

@ -0,0 +1,5 @@
#include "processor.h"
Processor::Processor()
{
}

32
cli/processor.h Normal file
Wyświetl plik

@ -0,0 +1,32 @@
#ifndef PROCESSOR_H
#define PROCESSOR_H
#include <QVector>
#include <QFile>
#include "pagedata.h"
#include "utils.h"
enum DataSource
{
FILEPATH,
ARRAY
};
class Processor
{
public:
/* Indicates the data source the processor performs best with. For example,
* you do not want to read the entire of a compressed archive just to get the content of
* a single file */
DataSource PREFERED_DATA_SOURCE = ARRAY;
Processor();
virtual QVector<PageData> process(const QByteArray &data) const = 0;
virtual QVector<PageData> process(QString path) const
{
return process(Utils::readFile(path));
}
virtual ~Processor()
{
}
};
#endif // PROCESSOR_H

Wyświetl plik

@ -0,0 +1 @@
#include "qssgeneralexception.h"

24
cli/qssgeneralexception.h Normal file
Wyświetl plik

@ -0,0 +1,24 @@
#ifndef QSSGENERALEXCEPTION_H
#define QSSGENERALEXCEPTION_H
#include <QException>
class QSSGeneralException : public QException
{
public:
QString message;
QSSGeneralException(QString message)
{
this->message = message;
};
void raise() const override
{
throw *this;
}
QSSGeneralException *clone() const override
{
return new QSSGeneralException(*this);
}
};
#endif // QSSGENERALEXCEPTION_H

Wyświetl plik

@ -0,0 +1,14 @@
#include "tagstripperprocessor.h"
TagStripperProcessor::TagStripperProcessor()
{
}
QVector<PageData> TagStripperProcessor::process(const QByteArray &data) const
{
auto result = DefaultTextProcessor::process(data);
// TODO: does not work properly with <br> and does not deal with entities...
result[0].content.remove(QRegExp("<[^>]*>"));
return result;
}

Wyświetl plik

@ -0,0 +1,14 @@
#ifndef XMLSTRIPPERPROCESSOR_H
#define XMLSTRIPPERPROCESSOR_H
#include "defaulttextprocessor.h"
class TagStripperProcessor : public DefaultTextProcessor
{
public:
TagStripperProcessor();
public:
QVector<PageData> process(const QByteArray &data) const override;
};
#endif // XMLSTRIPPERPROCESSOR_H

20
cli/utils.cpp Normal file
Wyświetl plik

@ -0,0 +1,20 @@
#include <QDebug>
#include "utils.h"
Utils::Utils()
{
}
QByteArray Utils::readFile(QString path)
{
QFile file(path);
if(!file.open(QIODevice::ReadOnly))
{
throw QSSGeneralException("Failed to open file: " + path);
}
QByteArray data = file.readAll();
if(data.isEmpty() && file.error() != QFileDevice::FileError::NoError)
{
throw QSSGeneralException("Error reading file: " + path + ", Error: " + file.error());
}
return data;
}

15
cli/utils.h Normal file
Wyświetl plik

@ -0,0 +1,15 @@
#ifndef UTILS_H
#define UTILS_H
#include <QFile>
#include <QString>
#include <QByteArray>
#include "qssgeneralexception.h"
class Utils
{
public:
Utils();
static QByteArray readFile(QString path);
};
#endif // UTILS_H

Wyświetl plik

@ -1,5 +0,0 @@
import os
DBPATH=os.getenv("QSS_PATH")
if DBPATH == None or DBPATH == "":
print("MIssing env var")
exit(1)

Wyświetl plik

@ -1,21 +0,0 @@
#!/bin/sh
TEMPFILE=$(mktemp)
DBFILE="$QSS_PATH"
function todelete()
{
echo "DELETE FROM file WHERE path = '$1';" >> /"$TEMPFILE"
}
echo "BEGIN TRANSACTION;" >> /"$TEMPFILE"
sqlite3 "$DBFILE" "SELECT path FROM file;"| while read line ; do
[ -e "$line" ] || todelete "$line"
done
echo "COMMIT TRANSACTION;" >> /"$TEMPFILE"
sqlite3 "$DBFILE" < /"$TEMPFILE"

Wyświetl plik

@ -1,21 +0,0 @@
#!/usr/bin/python3
import sqlite3
import sys
import config
dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
cursor = dbcon.cursor()
if len(sys.argv) < 2:
print("Error: Missing search")
search=sys.argv[1:]
#TODO: machien parseable
for row in cursor.execute("SELECT file.path, content.page FROM file INNER JOIN content ON file.id = content.fileid INNER JOIN content_fts ON content.id = content_fts.ROWID WHERE content_fts.content MATCH ? ORDER By file.mtime ASC", (search)):
print("File:", row[0], "Page: ", row[1])
dbcon.close()