PdfProcessor: Extract outline from documents

Esse commit está contido em:
Albert S. 2023-05-14 14:15:50 +02:00
commit b2ae0e488f
2 arquivos alterados com 29 adições e 5 exclusões

Ver arquivo

@ -5,9 +5,30 @@ PdfProcessor::PdfProcessor()
{ {
} }
QVector<PageData> PdfProcessor::process(const QByteArray &data) const QVector<DocumentOutlineEntry> PdfProcessor::createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const
{ {
QVector<PageData> result; QVector<DocumentOutlineEntry> result;
for(const Poppler::OutlineItem &outlineItem : outlineItems)
{
DocumentOutlineEntry documentOutlineEntry;
documentOutlineEntry.text = outlineItem.name();
documentOutlineEntry.type = OUTLINE_DESTINATION_TYPE_PAGE;
if(!outlineItem.destination().isNull())
{
documentOutlineEntry.destinationPage = outlineItem.destination()->pageNumber();
}
if(outlineItem.hasChildren())
{
documentOutlineEntry.children = createOutline(outlineItem.children());
}
result.append(documentOutlineEntry);
}
return result;
}
DocumentProcessResult PdfProcessor::process(const QByteArray &data) const
{
DocumentProcessResult result;
QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data)); QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data));
if(doc.isNull()) if(doc.isNull())
{ {
@ -26,12 +47,13 @@ QVector<PageData> PdfProcessor::process(const QByteArray &data) const
for(auto i = 0; i < pagecount; i++) for(auto i = 0; i < pagecount; i++)
{ {
QString text = doc->page(i)->text(entirePage); QString text = doc->page(i)->text(entirePage);
result.append({static_cast<unsigned int>(i + 1), text}); result.pages.append({static_cast<unsigned int>(i + 1), text});
/*TODO: hack, so we can fts search several words over the whole document, not just pages. /*TODO: hack, so we can fts search several words over the whole document, not just pages.
* this of course uses more space and should be solved differently. * this of course uses more space and should be solved differently.
*/ */
entire += text; entire += text;
} }
result.append({0, entire}); result.pages.append({0, entire});
result.outlines = createOutline(doc->outline());
return result; return result;
} }

Ver arquivo

@ -1,5 +1,6 @@
#ifndef PDFPROCESSOR_H #ifndef PDFPROCESSOR_H
#define PDFPROCESSOR_H #define PDFPROCESSOR_H
#include <poppler-qt5.h>
#include "processor.h" #include "processor.h"
class PdfProcessor : public Processor class PdfProcessor : public Processor
{ {
@ -7,7 +8,8 @@ class PdfProcessor : public Processor
PdfProcessor(); PdfProcessor();
public: public:
QVector<PageData> process(const QByteArray &data) const override; QVector<DocumentOutlineEntry> createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const;
DocumentProcessResult process(const QByteArray &data) const override;
}; };
#endif // PDFPROCESSOR_H #endif // PDFPROCESSOR_H