PdfProcessor: Extract outline from documents
This commit is contained in:
parent
02a371b81e
commit
b2ae0e488f
@ -5,9 +5,30 @@ PdfProcessor::PdfProcessor()
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
QVector<PageData> PdfProcessor::process(const QByteArray &data) const
|
QVector<DocumentOutlineEntry> PdfProcessor::createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const
|
||||||
{
|
{
|
||||||
QVector<PageData> result;
|
QVector<DocumentOutlineEntry> result;
|
||||||
|
for(const Poppler::OutlineItem &outlineItem : outlineItems)
|
||||||
|
{
|
||||||
|
DocumentOutlineEntry documentOutlineEntry;
|
||||||
|
documentOutlineEntry.text = outlineItem.name();
|
||||||
|
documentOutlineEntry.type = OUTLINE_DESTINATION_TYPE_PAGE;
|
||||||
|
if(!outlineItem.destination().isNull())
|
||||||
|
{
|
||||||
|
documentOutlineEntry.destinationPage = outlineItem.destination()->pageNumber();
|
||||||
|
}
|
||||||
|
if(outlineItem.hasChildren())
|
||||||
|
{
|
||||||
|
documentOutlineEntry.children = createOutline(outlineItem.children());
|
||||||
|
}
|
||||||
|
result.append(documentOutlineEntry);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
DocumentProcessResult PdfProcessor::process(const QByteArray &data) const
|
||||||
|
{
|
||||||
|
DocumentProcessResult result;
|
||||||
QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data));
|
QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data));
|
||||||
if(doc.isNull())
|
if(doc.isNull())
|
||||||
{
|
{
|
||||||
@ -26,12 +47,13 @@ QVector<PageData> PdfProcessor::process(const QByteArray &data) const
|
|||||||
for(auto i = 0; i < pagecount; i++)
|
for(auto i = 0; i < pagecount; i++)
|
||||||
{
|
{
|
||||||
QString text = doc->page(i)->text(entirePage);
|
QString text = doc->page(i)->text(entirePage);
|
||||||
result.append({static_cast<unsigned int>(i + 1), text});
|
result.pages.append({static_cast<unsigned int>(i + 1), text});
|
||||||
/*TODO: hack, so we can fts search several words over the whole document, not just pages.
|
/*TODO: hack, so we can fts search several words over the whole document, not just pages.
|
||||||
* this of course uses more space and should be solved differently.
|
* this of course uses more space and should be solved differently.
|
||||||
*/
|
*/
|
||||||
entire += text;
|
entire += text;
|
||||||
}
|
}
|
||||||
result.append({0, entire});
|
result.pages.append({0, entire});
|
||||||
|
result.outlines = createOutline(doc->outline());
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#ifndef PDFPROCESSOR_H
|
#ifndef PDFPROCESSOR_H
|
||||||
#define PDFPROCESSOR_H
|
#define PDFPROCESSOR_H
|
||||||
|
#include <poppler-qt5.h>
|
||||||
#include "processor.h"
|
#include "processor.h"
|
||||||
class PdfProcessor : public Processor
|
class PdfProcessor : public Processor
|
||||||
{
|
{
|
||||||
@ -7,7 +8,8 @@ class PdfProcessor : public Processor
|
|||||||
PdfProcessor();
|
PdfProcessor();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
QVector<PageData> process(const QByteArray &data) const override;
|
QVector<DocumentOutlineEntry> createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const;
|
||||||
|
DocumentProcessResult process(const QByteArray &data) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // PDFPROCESSOR_H
|
#endif // PDFPROCESSOR_H
|
||||||
|
Loading…
Reference in New Issue
Block a user