#include #include #include "pdfprocessor.h" PdfProcessor::PdfProcessor() { } QVector PdfProcessor::process(const QByteArray &data) const { QVector result; QScopedPointer doc(Poppler::Document::loadFromData(data)); if(doc.isNull()) { throw LooqsGeneralException("Failed to process pdf data"); } if(doc->isLocked()) { throw LooqsGeneralException("Doc is locked"); } QRectF entirePage; auto pagecount = doc->numPages(); QString entire; entire.reserve(data.size()); //TODO too much for(auto i = 0; i < pagecount; i++ ) { QString text =doc->page(i)->text(entirePage); result.append({static_cast(i+1),text }); /*TODO: hack, so we can fts search several words over the whole document, not just pages. * this of course uses more space and should be solved differently. */ entire += text; } result.append({0, entire}); return result; }