2019-04-06 17:16:42 +02:00
|
|
|
#include <QScopedPointer>
|
|
|
|
#include <poppler-qt5.h>
|
|
|
|
#include "pdfprocessor.h"
|
|
|
|
PdfProcessor::PdfProcessor()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2023-05-14 14:15:50 +02:00
|
|
|
QVector<DocumentOutlineEntry> PdfProcessor::createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const
|
2019-04-06 17:16:42 +02:00
|
|
|
{
|
2023-05-14 14:15:50 +02:00
|
|
|
QVector<DocumentOutlineEntry> result;
|
|
|
|
for(const Poppler::OutlineItem &outlineItem : outlineItems)
|
|
|
|
{
|
|
|
|
DocumentOutlineEntry documentOutlineEntry;
|
|
|
|
documentOutlineEntry.text = outlineItem.name();
|
|
|
|
documentOutlineEntry.type = OUTLINE_DESTINATION_TYPE_PAGE;
|
|
|
|
if(!outlineItem.destination().isNull())
|
|
|
|
{
|
|
|
|
documentOutlineEntry.destinationPage = outlineItem.destination()->pageNumber();
|
|
|
|
}
|
|
|
|
if(outlineItem.hasChildren())
|
|
|
|
{
|
|
|
|
documentOutlineEntry.children = createOutline(outlineItem.children());
|
|
|
|
}
|
|
|
|
result.append(documentOutlineEntry);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
DocumentProcessResult PdfProcessor::process(const QByteArray &data) const
|
|
|
|
{
|
|
|
|
DocumentProcessResult result;
|
2019-04-06 17:16:42 +02:00
|
|
|
QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data));
|
|
|
|
if(doc.isNull())
|
|
|
|
{
|
2021-06-12 14:59:58 +02:00
|
|
|
throw LooqsGeneralException("Failed to process pdf data");
|
2019-04-06 17:16:42 +02:00
|
|
|
}
|
2019-05-04 09:38:52 +02:00
|
|
|
if(doc->isLocked())
|
2019-04-22 23:13:45 +02:00
|
|
|
{
|
2021-06-12 14:59:58 +02:00
|
|
|
throw LooqsGeneralException("Doc is locked");
|
2019-04-22 23:13:45 +02:00
|
|
|
}
|
|
|
|
|
2019-04-06 17:16:42 +02:00
|
|
|
QRectF entirePage;
|
|
|
|
|
|
|
|
auto pagecount = doc->numPages();
|
|
|
|
QString entire;
|
|
|
|
entire.reserve(data.size()); // TODO too much
|
|
|
|
for(auto i = 0; i < pagecount; i++)
|
|
|
|
{
|
|
|
|
QString text = doc->page(i)->text(entirePage);
|
2023-05-14 14:15:50 +02:00
|
|
|
result.pages.append({static_cast<unsigned int>(i + 1), text});
|
2019-04-06 17:16:42 +02:00
|
|
|
/*TODO: hack, so we can fts search several words over the whole document, not just pages.
|
|
|
|
* this of course uses more space and should be solved differently.
|
|
|
|
*/
|
|
|
|
entire += text;
|
|
|
|
}
|
2023-05-14 14:15:50 +02:00
|
|
|
result.pages.append({0, entire});
|
|
|
|
result.outlines = createOutline(doc->outline());
|
2019-04-06 17:16:42 +02:00
|
|
|
return result;
|
|
|
|
}
|