From c6c0b09e5607fc3a0f99ae0d4bdbc590e4c45b29 Mon Sep 17 00:00:00 2001 From: Albert S Date: Sun, 14 May 2023 14:15:50 +0200 Subject: [PATCH] PdfProcessor: Extract outline from documents --- shared/pdfprocessor.cpp | 30 ++++++++++++++++++++++++++---- shared/pdfprocessor.h | 4 +++- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/shared/pdfprocessor.cpp b/shared/pdfprocessor.cpp index 6a40e31..34c84d5 100644 --- a/shared/pdfprocessor.cpp +++ b/shared/pdfprocessor.cpp @@ -5,9 +5,30 @@ PdfProcessor::PdfProcessor() { } -QVector PdfProcessor::process(const QByteArray &data) const +QVector PdfProcessor::createOutline(const QVector &outlineItems) const { - QVector result; + QVector result; + for(const Poppler::OutlineItem &outlineItem : outlineItems) + { + DocumentOutlineEntry documentOutlineEntry; + documentOutlineEntry.text = outlineItem.name(); + documentOutlineEntry.type = OUTLINE_DESTINATION_TYPE_PAGE; + if(!outlineItem.destination().isNull()) + { + documentOutlineEntry.destinationPage = outlineItem.destination()->pageNumber(); + } + if(outlineItem.hasChildren()) + { + documentOutlineEntry.children = createOutline(outlineItem.children()); + } + result.append(documentOutlineEntry); + } + return result; +} + +DocumentProcessResult PdfProcessor::process(const QByteArray &data) const +{ + DocumentProcessResult result; QScopedPointer doc(Poppler::Document::loadFromData(data)); if(doc.isNull()) { @@ -26,12 +47,13 @@ QVector PdfProcessor::process(const QByteArray &data) const for(auto i = 0; i < pagecount; i++) { QString text = doc->page(i)->text(entirePage); - result.append({static_cast(i + 1), text}); + result.pages.append({static_cast(i + 1), text}); /*TODO: hack, so we can fts search several words over the whole document, not just pages. * this of course uses more space and should be solved differently. */ entire += text; } - result.append({0, entire}); + result.pages.append({0, entire}); + result.outlines = createOutline(doc->outline()); return result; } diff --git a/shared/pdfprocessor.h b/shared/pdfprocessor.h index 4802879..f932f11 100644 --- a/shared/pdfprocessor.h +++ b/shared/pdfprocessor.h @@ -1,5 +1,6 @@ #ifndef PDFPROCESSOR_H #define PDFPROCESSOR_H +#include #include "processor.h" class PdfProcessor : public Processor { @@ -7,7 +8,8 @@ class PdfProcessor : public Processor PdfProcessor(); public: - QVector process(const QByteArray &data) const override; + QVector createOutline(const QVector &outlineItems) const; + DocumentProcessResult process(const QByteArray &data) const override; }; #endif // PDFPROCESSOR_H