diff --git a/addindex b/addindex
deleted file mode 100755
index c1b8e60..0000000
--- a/addindex
+++ /dev/null
@@ -1,167 +0,0 @@
-#!/usr/bin/python3
-import sqlite3
-import os.path
-import sys
-import subprocess
-import zipfile
-import xml.etree.ElementTree
-import re
-import chardet
-import config
-from multiprocessing import Pool
-
-
-
-class pagedata:
- page = 0
- content = ""
-
-
-def singlepagelist(content):
- result = pagedata()
- result.content = content
- result.page = 0
- l = list();
- l.append(result)
- return l
-
-def striptags(content):
- result = ""
- try:
- result = ''.join(xml.etree.ElementTree.fromstring(content).itertext())
- except:
- #TODO: test
test2 will make it testtest2 not test test2
- result = re.sub('<[^>]*>', '', content)
-
- return result
-
-
-def strip_irrelevant(content):
- result = content.replace("\n", " ").replace("\t", " ")
- result = re.sub(' +', ' ', result)
- return result;
-
-
-
-def process_pdf(path):
- result = list()
- args=["pdftotext", path, "-"]
- stdout,stderr = subprocess.Popen(args,stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
- content = strip_irrelevant(stdout.decode('utf-8'))
- #it is faster to do it like this than to call pdfottext for each page
- splitted = content.split("\f")
- count=1
- for page in splitted:
- data = pagedata()
- data.page = count
- data.content = page
- result.append(data)
- count += 1
-
- #TODO: current hack, so we can fts search several words over the whole document
- #this of course uses more space, but in the end that's not a big problem
- #Nevertheless, this remains a hack
- everything = pagedata()
- everything.page = 0
- everything.content = content.replace("\f", "")
- result.append(everything)
- return result
-
-def process_odt(path):
- fd = zipfile.ZipFile(path)
- content = fd.read("content.xml").decode("utf-8")
- fd.close()
- return singlepagelist(striptags(content))
-
-def process_ods(path):
- return process_odt(path)
-
-def readtext(path):
- fd = open(path, "rb")
- content = fd.read()
- fd.close()
-
- result=""
- try:
- result = str(content.decode("utf-8"))
- except:
- try:
- encoding = chardet.detect(content)["encoding"];
- if encoding == None:
- result = ""
- else:
- result = str(content.decode(encoding))
- except:
- print("FAILED DECODING: " + path)
- result = ""
- return result
-
-def process_striptags(path):
- content = readtext(path)
- return singlepagelist(striptags(content))
-
-def process_text(path):
- return singlepagelist(readtext(path))
-
-def process_nothing(path):
- return list()
-
-def exists(abspath, mtime):
- cursor = dbcon.cursor()
- cursor.execute("SELECT 1 FROM file WHERE path = ? AND mtime = ?" , (abspath, mtime))
- result = cursor.fetchone()
- if result != None and result[0] == 1:
- return True
- return False
-
-def insert(path):
- print("processing", path)
- abspath=os.path.abspath(path)
- mtime = int(os.stat(abspath).st_mtime)
-
- if exists(abspath, mtime):
- print("Leaving alone " + abspath + " because it wasn't changed")
- return
- basename=os.path.basename(abspath)
- ext = os.path.splitext(abspath)[1]
-
- content=""
-
- processor=process_nothing
- if ext in preprocess:
- processor=preprocess[ext]
- pagedatalist = processor(abspath)
-
- #TODO: assumes sqlitehas been built with thread safety (and it is the default)
- cursor = dbcon.cursor()
- cursor.execute("BEGIN TRANSACTION")
- cursor.execute("DELETE FROM file WHERE path = ?", (abspath,))
- cursor.execute("INSERT INTO file(path, mtime) VALUES(?, ?) ", (abspath, mtime))
- fileid=cursor.lastrowid
- for pagedata in pagedatalist:
- cursor.execute("INSERT INTO content(fileid, page, content) VALUES(?, ?, ?)", (fileid, pagedata.page, pagedata.content))
- cursor.execute("COMMIT TRANSACTION")
-
-preprocess={".pdf":process_pdf, ".odt":process_odt, ".ods":process_ods, ".html":process_striptags, ".xml":process_nothing, ".txt":process_text,
- ".sql":process_text, ".c":process_text, ".cpp":process_text, ".js":process_text, ".java":process_text,
- ".py":process_text, '.md':process_text}
-
-def init():
- global dbcon
- dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
-
-
-dbcon = None
-if __name__ == '__main__':
- with Pool(processes=4,initializer=init) as pool:
-
- if len(sys.argv) < 2:
- pool.map(insert, (l.replace("\n", "") for l in sys.stdin))
- else:
- pool.map(insert, sys.argv[1:])
-
-
-
-
-
-
diff --git a/cli/addfileexception.h b/cli/addfileexception.h
new file mode 100644
index 0000000..6873efa
--- /dev/null
+++ b/cli/addfileexception.h
@@ -0,0 +1,9 @@
+#ifndef ADDFILEEXCEPTION_H
+#define ADDFILEEXCEPTION_H
+#include
+#include
+class AddFileException : public QException
+{
+
+};
+#endif // ADDFILEEXCEPTION_H
diff --git a/cli/cli.pro b/cli/cli.pro
new file mode 100644
index 0000000..04732bd
--- /dev/null
+++ b/cli/cli.pro
@@ -0,0 +1,46 @@
+QT -= gui
+QT += sql concurrent
+CONFIG += c++11 console
+CONFIG -= app_bundle
+
+# The following define makes your compiler emit warnings if you use
+# any feature of Qt which as been marked deprecated (the exact warnings
+# depend on your compiler). Please consult the documentation of the
+# deprecated API in order to know how to port your code away from it.
+DEFINES += QT_DEPRECATED_WARNINGS
+
+# You can also make your code fail to compile if you use deprecated APIs.
+# In order to do so, uncomment the following line.
+# You can also select to disable deprecated APIs only up to a certain version of Qt.
+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
+LIBS += -luchardet -lpoppler-qt5 -lquazip
+SOURCES += \
+ main.cpp \
+ encodingdetector.cpp \
+ processor.cpp \
+ pdfprocessor.cpp \
+ defaulttextprocessor.cpp \
+ command.cpp \
+ commandadd.cpp \
+ tagstripperprocessor.cpp \
+ nothingprocessor.cpp \
+ odtprocessor.cpp \
+ utils.cpp \
+ odsprocessor.cpp \
+ qssgeneralexception.cpp
+
+HEADERS += \
+ encodingdetector.h \
+ processor.h \
+ pagedata.h \
+ pdfprocessor.h \
+ defaulttextprocessor.h \
+ command.h \
+ commandadd.h \
+ tagstripperprocessor.h \
+ nothingprocessor.h \
+ odtprocessor.h \
+ utils.h \
+ odsprocessor.h \
+ qssgeneralexception.h
+INCLUDEPATH += /usr/include/poppler/qt5/ /usr/include/quazip5
diff --git a/cli/command.cpp b/cli/command.cpp
new file mode 100644
index 0000000..7e69df3
--- /dev/null
+++ b/cli/command.cpp
@@ -0,0 +1,37 @@
+#include
+#include
+#include
+#include "command.h"
+#include "qssgeneralexception.h"
+
+bool Command::fileExistsInDatabase(QSqlDatabase &db, QString path, qint64 mtime)
+{
+ auto query = QSqlQuery("SELECT 1 FROM file WHERE path = ? and mtime = ?", db);
+ query.addBindValue(path);
+ query.addBindValue(mtime);
+ if(!query.exec())
+ { throw QSSGeneralException("Error while trying to query for file existance");
+ }
+ if(!query.next())
+ {
+ return false;
+ }
+ return query.value(0).toBool();
+}
+
+QSqlDatabase Command::dbConnection()
+{
+ if(dbStore.hasLocalData())
+ {
+ return dbStore.localData();
+ }
+ QSqlDatabase db = QSqlDatabase::addDatabase("QSQLITE", "QSS" + QString::number((quint64)QThread::currentThread(), 16));
+ db.setDatabaseName(this->dbConnectionString);
+ if(!db.open())
+ {
+ qDebug() << "Failed to open the database: " << this->dbConnectionString;
+ }
+ dbStore.setLocalData(db);
+ return db;
+}
+
diff --git a/cli/command.h b/cli/command.h
new file mode 100644
index 0000000..ffa1ae5
--- /dev/null
+++ b/cli/command.h
@@ -0,0 +1,26 @@
+#ifndef COMMAND_H
+#define COMMAND_H
+#include
+#include
+#include
+#include
+#include
+class Command
+{
+protected:
+ bool fileExistsInDatabase(QSqlDatabase &db, QString path, qint64 mtime);
+ QByteArray readFile(QString path) const;
+ QString dbConnectionString;
+ QThreadStorage dbStore;
+public:
+ Command(QString dbConnectionString)
+ {
+ this->dbConnectionString = dbConnectionString;
+ }
+
+ QSqlDatabase dbConnection();
+ virtual int handle(QStringList arguments) = 0;
+ virtual ~Command() { };
+};
+
+#endif // COMMAND_H
diff --git a/cli/commandadd.cpp b/cli/commandadd.cpp
new file mode 100644
index 0000000..f513566
--- /dev/null
+++ b/cli/commandadd.cpp
@@ -0,0 +1,214 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "processor.h"
+#include "pdfprocessor.h"
+#include "commandadd.h"
+#include "defaulttextprocessor.h"
+#include "tagstripperprocessor.h"
+#include "nothingprocessor.h"
+#include "odtprocessor.h"
+#include "odsprocessor.h"
+#include "utils.h"
+static DefaultTextProcessor *defaultTextProcessor = new DefaultTextProcessor();
+static TagStripperProcessor *tagStripperProcessor = new TagStripperProcessor();
+static NothingProcessor *nothingProcessor = new NothingProcessor();
+static OdtProcessor *odtProcessor = new OdtProcessor();
+static OdsProcessor *odsProcessor = new OdsProcessor();
+
+static QMap processors{
+ {"pdf", new PdfProcessor() },
+ {"txt", defaultTextProcessor},
+ {"md", defaultTextProcessor},
+ {"py", defaultTextProcessor},
+ {"xml", nothingProcessor},
+ {"html", tagStripperProcessor},
+ {"java", defaultTextProcessor},
+ {"js", defaultTextProcessor},
+ {"cpp", defaultTextProcessor},
+ {"c", defaultTextProcessor},
+ {"sql", defaultTextProcessor},
+ {"odt", odtProcessor},
+ {"ods", odsProcessor}
+};
+
+
+
+AddFileResult CommandAdd::addFile(QString path)
+{
+ QFileInfo info(path);
+ QString absPath = info.absoluteFilePath();
+ auto mtime = info.lastModified().toSecsSinceEpoch();
+ QChar fileType = info.isDir() ? 'd' : 'f';
+
+ QSqlDatabase db = dbConnection();
+ if(fileExistsInDatabase(db, absPath, mtime))
+ {
+ return SKIPPED;
+ }
+
+
+
+ Processor *processor = processors.value(info.suffix(), nothingProcessor);
+ QVector pageData;
+ if(processor->PREFERED_DATA_SOURCE == FILEPATH)
+ {
+ pageData = processor->process(absPath);
+ }
+ else
+ {
+ pageData = processor->process(Utils::readFile(absPath));
+ }
+
+ if(pageData.isEmpty())
+ {
+ qDebug() << "Could not get any content for " << absPath;
+ }
+
+
+ //Workaround to "database is locked" error. Perhaps try WAL mode etc.
+ //QMutexLocker locker(&writeMutex);
+ if(!db.transaction())
+ {
+ qDebug() << "Failed to open transaction for " << absPath << " : " << db.lastError();
+ return DBFAIL;
+ }
+
+ QSqlQuery delQuery("DELETE FROM file WHERE path = ?", db);
+ delQuery.addBindValue(absPath);
+ if(!delQuery.exec())
+ {
+ qDebug() << "Failed DELETE query" << delQuery.lastError();
+ db.rollback();
+ return DBFAIL;
+ }
+
+
+ QSqlQuery inserterQuery("INSERT INTO file(path, mtime, size, filetype) VALUES(?, ?, ?, ?)", db);
+ inserterQuery.addBindValue(absPath);
+ inserterQuery.addBindValue(mtime);
+ inserterQuery.addBindValue(info.size());
+ inserterQuery.addBindValue(fileType);
+ if(!inserterQuery.exec())
+ {
+ qDebug() << "Failed INSERT query" << inserterQuery.lastError();
+ db.rollback();
+ return DBFAIL;
+ }
+ int lastid = inserterQuery.lastInsertId().toInt();
+ for(PageData &data : pageData)
+ {
+ QSqlQuery contentQuery("INSERT INTO content(fileid, page, content) VALUES(?, ?, ?)", db);
+ contentQuery.addBindValue(lastid);
+ contentQuery.addBindValue(data.pagenumber);
+ contentQuery.addBindValue(data.content);
+ if(!contentQuery.exec())
+ {
+ db.rollback();
+ qDebug() << "Failed content insertion " << contentQuery.lastError();
+ return DBFAIL;
+ }
+ }
+
+ if(!db.commit())
+ {
+ db.rollback();
+ qDebug() << "Failed to commit transaction for " << absPath << " : " << db.lastError();
+ return DBFAIL;
+ }
+ return OK;
+}
+
+
+int CommandAdd::handle(QStringList arguments)
+{
+ QCommandLineParser parser;
+ parser.addOptions({
+ { { "c", "continue" }, "Continue adding files, don't exit on first error"},
+ { { "a", "all" }, "On error, no files should be added, even already processed ones" },
+ { { "v", "verbose" }, "Print skipped and added files" },
+ { { "n", "threads" }, "Number of threads to use.", "threads" }
+ });
+
+ parser.addHelpOption();
+ parser.addPositionalArgument("paths", "List of paths to process/add to the index", "[paths...]");
+
+ parser.process(arguments);
+ bool keepGoing = false;
+ bool verbose = false;
+ if(parser.isSet("continue"))
+ {
+ keepGoing = true;
+ }
+ if(parser.isSet("verbose"))
+ {
+ verbose = true;
+ }
+ if(parser.isSet("all"))
+ {
+ throw QSSGeneralException("To be implemented");
+ }
+ if(parser.isSet("threads"))
+ {
+ QString threadsCount = parser.value("threads");
+ QThreadPool::globalInstance()->setMaxThreadCount(threadsCount.toInt());
+ }
+
+ QStringList files = parser.positionalArguments();
+
+ if(files.length() == 0)
+ {
+ QTextStream stream(stdin);
+
+ while(!stream.atEnd())
+ {
+ QString path = stream.readLine();
+ files.append(path);
+ }
+ }
+
+ bool terminate = false;
+ QtConcurrent::blockingMap(files, [&](QString &path) {
+ if(terminate)
+ {
+ return;
+ }
+ if(verbose)
+ {
+ qDebug() << "Processing " << path;
+ }
+ auto result = addFile(path);
+ if(result == DBFAIL)
+ {
+ qDebug() << "Failed to add " << path;
+ if(!keepGoing)
+ {
+ terminate = true;
+ }
+ }
+ if(verbose)
+ {
+ if(result == SKIPPED)
+ {
+ qDebug() << "SKIPPED" << path << "as it already exists in the database";
+ }
+ else
+ {
+ qDebug() << "Added" << path;
+ }
+ }
+
+
+ });
+
+ return 0;
+}
diff --git a/cli/commandadd.h b/cli/commandadd.h
new file mode 100644
index 0000000..bdaa689
--- /dev/null
+++ b/cli/commandadd.h
@@ -0,0 +1,23 @@
+#ifndef COMMANDADD_H
+#define COMMANDADD_H
+#include
+#include "command.h"
+enum AddFileResult
+{
+ OK,
+ SKIPPED,
+ DBFAIL
+};
+
+class CommandAdd : public Command
+{
+private:
+ AddFileResult addFile(QString path);
+ QMutex writeMutex;
+public:
+ using Command::Command;
+
+ int handle(QStringList arguments) override;
+};
+
+#endif // COMMANDADD_H
diff --git a/cli/defaulttextprocessor.cpp b/cli/defaulttextprocessor.cpp
new file mode 100644
index 0000000..6f040b1
--- /dev/null
+++ b/cli/defaulttextprocessor.cpp
@@ -0,0 +1,31 @@
+#include
+#include
+#include
+#include
+#include "defaulttextprocessor.h"
+
+DefaultTextProcessor::DefaultTextProcessor()
+{
+
+}
+
+QString DefaultTextProcessor::processText(const QByteArray &data) const
+{
+ QString encoding = encodingDetector.detectEncoding(data);
+ if(!encoding.isEmpty())
+ {
+ QTextCodec *codec = QTextCodec::codecForName(encoding.toUtf8());
+ if(codec != nullptr)
+ {
+ return codec->toUnicode(data);
+ }
+ qWarning() << "No codec found for " << encoding;
+ return QString(data);
+ }
+ return { };
+}
+
+QVector DefaultTextProcessor::process(const QByteArray &data) const
+{
+ return {{0, processText(data)}};
+}
diff --git a/cli/defaulttextprocessor.h b/cli/defaulttextprocessor.h
new file mode 100644
index 0000000..c3aff34
--- /dev/null
+++ b/cli/defaulttextprocessor.h
@@ -0,0 +1,16 @@
+#ifndef DEFAULTTEXTPROCESSOR_H
+#define DEFAULTTEXTPROCESSOR_H
+
+#include "processor.h"
+#include "encodingdetector.h"
+class DefaultTextProcessor : public Processor
+{
+protected:
+ EncodingDetector encodingDetector;
+public:
+ DefaultTextProcessor();
+ QString processText(const QByteArray &data) const;
+ QVector process(const QByteArray &data) const override;
+};
+
+#endif // DEFAULTTEXTPROCESSOR_H
diff --git a/cli/encodingdetector.cpp b/cli/encodingdetector.cpp
new file mode 100644
index 0000000..a204142
--- /dev/null
+++ b/cli/encodingdetector.cpp
@@ -0,0 +1,50 @@
+#include
+#include "encodingdetector.h"
+#include
+EncodingDetector::EncodingDetector()
+{
+
+}
+
+
+QString EncodingDetector::detectEncoding(const QByteArray &data) const
+{
+ uchardet_t detector = uchardet_new();
+ if(uchardet_handle_data(detector, data.data(), data.size()) != 0 )
+ {
+ uchardet_delete(detector);
+ throw QSSGeneralException("Decoder failed");
+ }
+ uchardet_data_end(detector);
+ QString encoding = uchardet_get_charset(detector);
+ uchardet_delete(detector);
+ return encoding;
+
+}
+QString EncodingDetector::detectEncoding(QDataStream &s) const
+{
+ uchardet_t detector = uchardet_new();
+
+ char buffer[4096];
+ int n;
+ while((n = s.readRawData(buffer, sizeof(buffer))) > 0)
+ {
+ if(uchardet_handle_data(detector, buffer, n) != 0)
+ {
+ uchardet_delete(detector);
+
+ throw QSSGeneralException("Decoder failed");
+ }
+ }
+ if(n == -1)
+ {
+ uchardet_delete(detector);
+ throw QSSGeneralException("Read failed");
+ }
+ uchardet_data_end(detector);
+ QString encoding = uchardet_get_charset(detector);
+ uchardet_delete(detector);
+ return encoding;
+}
+
+
diff --git a/cli/encodingdetector.h b/cli/encodingdetector.h
new file mode 100644
index 0000000..17f5909
--- /dev/null
+++ b/cli/encodingdetector.h
@@ -0,0 +1,14 @@
+#ifndef ENCODINGDETECTOR_H
+#define ENCODINGDETECTOR_H
+#include
+#include
+class EncodingDetector
+{
+
+public:
+ EncodingDetector();
+ QString detectEncoding(const QByteArray &data) const;
+ QString detectEncoding(QDataStream &s) const;
+};
+
+#endif // ENCODINGDETECTOR_H
diff --git a/cli/main.cpp b/cli/main.cpp
new file mode 100644
index 0000000..09c5da0
--- /dev/null
+++ b/cli/main.cpp
@@ -0,0 +1,76 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "encodingdetector.h"
+#include "pdfprocessor.h"
+#include "defaulttextprocessor.h"
+#include "command.h"
+#include "commandadd.h"
+void printUsage(QString argv0)
+{
+ qInfo() << "Usage: " << argv0 << "command";
+}
+
+Command *commandFromName(QString name, QString connectionstring)
+{
+ if(name == "add")
+ {
+ return new CommandAdd(connectionstring);
+ }
+ if(name == "delete")
+ {
+
+ }
+ if(name == "update")
+ {
+
+ }
+ if(name == "search")
+ {
+
+ }
+ return nullptr;
+}
+
+int main(int argc, char *argv[])
+{
+ QCoreApplication app(argc, argv);
+
+ QStringList args = app.arguments();
+ QString argv0 = args.takeFirst();
+ if(args.length() < 1)
+ {
+ printUsage(argv0);
+ exit(1);
+ }
+
+ QString commandName = args.first();
+ Command *cmd = commandFromName(commandName, QProcessEnvironment::systemEnvironment().value("QSS_PATH"));
+ if(cmd != nullptr)
+ {
+ try
+ {
+ return cmd->handle(args);
+ }
+ catch(const QSSGeneralException &e)
+ {
+ qDebug() << "Exception caught, message: " << e.message;
+ }
+ }
+ else
+ {
+ qDebug() << "Unknown command " << commandName;
+ }
+ return 1;
+}
diff --git a/cli/nothingprocessor.cpp b/cli/nothingprocessor.cpp
new file mode 100644
index 0000000..b11f9f8
--- /dev/null
+++ b/cli/nothingprocessor.cpp
@@ -0,0 +1,6 @@
+#include "nothingprocessor.h"
+
+NothingProcessor::NothingProcessor()
+{
+
+}
diff --git a/cli/nothingprocessor.h b/cli/nothingprocessor.h
new file mode 100644
index 0000000..9c25f02
--- /dev/null
+++ b/cli/nothingprocessor.h
@@ -0,0 +1,19 @@
+#ifndef NOTHINGPROCESSOR_H
+#define NOTHINGPROCESSOR_H
+#include
+#include "processor.h"
+#include "pagedata.h"
+
+class NothingProcessor : public Processor
+{
+public:
+ NothingProcessor();
+
+public:
+ QVector process(const QByteArray &data) const override {
+ return { };
+ }
+};
+
+
+#endif // NOTHINGPROCESSOR_H
diff --git a/cli/odsprocessor.cpp b/cli/odsprocessor.cpp
new file mode 100644
index 0000000..751918c
--- /dev/null
+++ b/cli/odsprocessor.cpp
@@ -0,0 +1,6 @@
+#include "odsprocessor.h"
+
+OdsProcessor::OdsProcessor()
+{
+
+}
diff --git a/cli/odsprocessor.h b/cli/odsprocessor.h
new file mode 100644
index 0000000..5578b32
--- /dev/null
+++ b/cli/odsprocessor.h
@@ -0,0 +1,10 @@
+#ifndef ODSPROCESSOR_H
+#define ODSPROCESSOR_H
+#include "odtprocessor.h"
+class OdsProcessor : public OdtProcessor
+{
+public:
+ OdsProcessor();
+};
+
+#endif // ODSPROCESSOR_H
diff --git a/cli/odtprocessor.cpp b/cli/odtprocessor.cpp
new file mode 100644
index 0000000..4e72e3b
--- /dev/null
+++ b/cli/odtprocessor.cpp
@@ -0,0 +1,27 @@
+#include
+#include
+#include "odtprocessor.h"
+#include "tagstripperprocessor.h"
+
+
+QVector OdtProcessor::process(const QByteArray &data) const
+{
+ throw QSSGeneralException("Not implemented yet");
+}
+
+QVector OdtProcessor::process(QString path) const
+{
+ QuaZipFile zipFile(path);
+ zipFile.setFileName("content.xml");
+ if(!zipFile.open(QIODevice::ReadOnly))
+ {
+ throw QSSGeneralException("Error while opening file " + path);
+ }
+ QByteArray entireContent = zipFile.readAll();
+ if(entireContent.isEmpty())
+ {
+ throw QSSGeneralException("Error while reading content.xml of " + path);
+ }
+ TagStripperProcessor tsp;
+ return tsp.process(entireContent);
+}
diff --git a/cli/odtprocessor.h b/cli/odtprocessor.h
new file mode 100644
index 0000000..7594d7e
--- /dev/null
+++ b/cli/odtprocessor.h
@@ -0,0 +1,18 @@
+#ifndef ODTPROCESSOR_H
+#define ODTPROCESSOR_H
+#include "processor.h"
+class OdtProcessor : public Processor
+{
+public:
+ OdtProcessor()
+ {
+ this->PREFERED_DATA_SOURCE = FILEPATH;
+ }
+ QVector process(const QByteArray &data) const override;
+
+ QVector process(QString path) const override;
+
+
+};
+
+#endif // ODTPROCESSOR_H
diff --git a/cli/pagedata.h b/cli/pagedata.h
new file mode 100644
index 0000000..2e2ae55
--- /dev/null
+++ b/cli/pagedata.h
@@ -0,0 +1,21 @@
+#ifndef PAGEDATA_H
+#define PAGEDATA_H
+#include
+class PageData
+{
+public:
+
+ unsigned int pagenumber = 0;
+ QString content;
+
+ PageData()
+ {
+
+ }
+ PageData(unsigned int pagenumber, QString content)
+ {
+ this->pagenumber = pagenumber;
+ this->content = content;
+ }
+};
+#endif // PAGEDATA_H
diff --git a/cli/pdfprocessor.cpp b/cli/pdfprocessor.cpp
new file mode 100644
index 0000000..1fd6f70
--- /dev/null
+++ b/cli/pdfprocessor.cpp
@@ -0,0 +1,34 @@
+#include
+#include
+#include "pdfprocessor.h"
+PdfProcessor::PdfProcessor()
+{
+
+}
+
+
+QVector PdfProcessor::process(const QByteArray &data) const
+{
+ QVector result;
+ QScopedPointer doc(Poppler::Document::loadFromData(data));
+ if(doc.isNull())
+ {
+ return { };
+ }
+ QRectF entirePage;
+
+ auto pagecount = doc->numPages();
+ QString entire;
+ entire.reserve(data.size()); //TODO too much
+ for(auto i = 0; i < pagecount; i++ )
+ {
+ QString text =doc->page(i)->text(entirePage);
+ result.append({static_cast(i+1),text });
+ /*TODO: hack, so we can fts search several words over the whole document, not just pages.
+ * this of course uses more space and should be solved differently.
+ */
+ entire += text;
+ }
+ result.append({0, entire});
+ return result;
+}
diff --git a/cli/pdfprocessor.h b/cli/pdfprocessor.h
new file mode 100644
index 0000000..54ad09d
--- /dev/null
+++ b/cli/pdfprocessor.h
@@ -0,0 +1,13 @@
+#ifndef PDFPROCESSOR_H
+#define PDFPROCESSOR_H
+#include "processor.h"
+class PdfProcessor : public Processor
+{
+public:
+ PdfProcessor();
+
+public:
+ QVector process(const QByteArray &data) const override;
+};
+
+#endif // PDFPROCESSOR_H
diff --git a/cli/processor.cpp b/cli/processor.cpp
new file mode 100644
index 0000000..5a9e34c
--- /dev/null
+++ b/cli/processor.cpp
@@ -0,0 +1,7 @@
+#include "processor.h"
+
+Processor::Processor()
+{
+
+}
+
diff --git a/cli/processor.h b/cli/processor.h
new file mode 100644
index 0000000..c3c8512
--- /dev/null
+++ b/cli/processor.h
@@ -0,0 +1,32 @@
+#ifndef PROCESSOR_H
+#define PROCESSOR_H
+#include
+#include
+#include "pagedata.h"
+#include "utils.h"
+enum DataSource
+{
+ FILEPATH,
+ ARRAY
+};
+
+class Processor
+{
+public:
+ /* Indicates the data source the processor performs best with. For example,
+ * you do not want to read the entire of a compressed archive just to get the content of
+ * a single file */
+ DataSource PREFERED_DATA_SOURCE = ARRAY;
+ Processor();
+ virtual QVector process(const QByteArray &data) const = 0;
+ virtual QVector process(QString path) const
+ {
+ return process(Utils::readFile(path));
+
+ }
+
+
+ virtual ~Processor() { }
+};
+
+#endif // PROCESSOR_H
diff --git a/cli/qssgeneralexception.cpp b/cli/qssgeneralexception.cpp
new file mode 100644
index 0000000..befc46f
--- /dev/null
+++ b/cli/qssgeneralexception.cpp
@@ -0,0 +1,2 @@
+#include "qssgeneralexception.h"
+
diff --git a/cli/qssgeneralexception.h b/cli/qssgeneralexception.h
new file mode 100644
index 0000000..1a0a66e
--- /dev/null
+++ b/cli/qssgeneralexception.h
@@ -0,0 +1,15 @@
+#ifndef QSSGENERALEXCEPTION_H
+#define QSSGENERALEXCEPTION_H
+
+#include
+
+class QSSGeneralException : public QException
+{
+public:
+ QString message;
+ QSSGeneralException(QString message) { this->message = message; };
+ void raise() const override { throw *this; }
+ QSSGeneralException *clone() const override { return new QSSGeneralException(*this); }
+};
+
+#endif // QSSGENERALEXCEPTION_H
diff --git a/cli/tagstripperprocessor.cpp b/cli/tagstripperprocessor.cpp
new file mode 100644
index 0000000..1f6cae6
--- /dev/null
+++ b/cli/tagstripperprocessor.cpp
@@ -0,0 +1,15 @@
+#include "tagstripperprocessor.h"
+
+TagStripperProcessor::TagStripperProcessor()
+{
+
+}
+
+QVector TagStripperProcessor::process(const QByteArray &data) const
+{
+ auto result = DefaultTextProcessor::process(data);
+ //TODO: does not work properly with
and does not deal with entities...
+
+ result[0].content.remove(QRegExp("<[^>]*>"));
+ return result;
+}
diff --git a/cli/tagstripperprocessor.h b/cli/tagstripperprocessor.h
new file mode 100644
index 0000000..5111935
--- /dev/null
+++ b/cli/tagstripperprocessor.h
@@ -0,0 +1,14 @@
+#ifndef XMLSTRIPPERPROCESSOR_H
+#define XMLSTRIPPERPROCESSOR_H
+#include "defaulttextprocessor.h"
+
+class TagStripperProcessor : public DefaultTextProcessor
+{
+public:
+ TagStripperProcessor();
+
+public:
+ QVector process(const QByteArray &data) const override;
+};
+
+#endif // XMLSTRIPPERPROCESSOR_H
diff --git a/cli/utils.cpp b/cli/utils.cpp
new file mode 100644
index 0000000..005e243
--- /dev/null
+++ b/cli/utils.cpp
@@ -0,0 +1,21 @@
+#include
+#include "utils.h"
+Utils::Utils()
+{
+
+}
+
+QByteArray Utils::readFile(QString path)
+{
+ QFile file(path);
+ if(!file.open(QIODevice::ReadOnly))
+ {
+ throw QSSGeneralException("Failed to open file: " + path);
+ }
+ QByteArray data = file.readAll();
+ if(data.isEmpty() && file.error() != QFileDevice::FileError::NoError)
+ {
+ throw QSSGeneralException("Error reading file: " + path + ", Error: " + file.error());
+ }
+ return data;
+}
diff --git a/cli/utils.h b/cli/utils.h
new file mode 100644
index 0000000..b64ccb6
--- /dev/null
+++ b/cli/utils.h
@@ -0,0 +1,15 @@
+#ifndef UTILS_H
+#define UTILS_H
+#include
+#include
+#include
+#include "qssgeneralexception.h"
+
+class Utils
+{
+public:
+ Utils();
+ static QByteArray readFile(QString path);
+};
+
+#endif // UTILS_H
diff --git a/config.py b/config.py
deleted file mode 100644
index e8408e8..0000000
--- a/config.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import os
-DBPATH=os.getenv("QSS_PATH")
-if DBPATH == None or DBPATH == "":
- print("MIssing env var")
- exit(1)
diff --git a/delindex b/delindex
deleted file mode 100755
index 73679c4..0000000
--- a/delindex
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-TEMPFILE=$(mktemp)
-DBFILE="$QSS_PATH"
-function todelete()
-{
- echo "DELETE FROM file WHERE path = '$1';" >> /"$TEMPFILE"
-}
-
-echo "BEGIN TRANSACTION;" >> /"$TEMPFILE"
-
-sqlite3 "$DBFILE" "SELECT path FROM file;"| while read line ; do
-[ -e "$line" ] || todelete "$line"
-done
-
-echo "COMMIT TRANSACTION;" >> /"$TEMPFILE"
-
-sqlite3 "$DBFILE" < /"$TEMPFILE"
-
-
-
-
diff --git a/searchindex b/searchindex
deleted file mode 100755
index 35e4da8..0000000
--- a/searchindex
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/python3
-import sqlite3
-import sys
-import config
-
-dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
-cursor = dbcon.cursor()
-
-if len(sys.argv) < 2:
- print("Error: Missing search")
-
-search=sys.argv[1:]
-#TODO: machien parseable
-for row in cursor.execute("SELECT file.path, content.page FROM file INNER JOIN content ON file.id = content.fileid INNER JOIN content_fts ON content.id = content_fts.ROWID WHERE content_fts.content MATCH ? ORDER By file.mtime ASC", (search)):
- print("File:", row[0], "Page: ", row[1])
-dbcon.close()
-
-
-
-
-