PdfProcessor: Extract outline from documents
This commit is contained in:
		| @@ -5,9 +5,30 @@ PdfProcessor::PdfProcessor() | |||||||
| { | { | ||||||
| } | } | ||||||
|  |  | ||||||
| QVector<PageData> PdfProcessor::process(const QByteArray &data) const | QVector<DocumentOutlineEntry> PdfProcessor::createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const | ||||||
| { | { | ||||||
| 	QVector<PageData> result; | 	QVector<DocumentOutlineEntry> result; | ||||||
|  | 	for(const Poppler::OutlineItem &outlineItem : outlineItems) | ||||||
|  | 	{ | ||||||
|  | 		DocumentOutlineEntry documentOutlineEntry; | ||||||
|  | 		documentOutlineEntry.text = outlineItem.name(); | ||||||
|  | 		documentOutlineEntry.type = OUTLINE_DESTINATION_TYPE_PAGE; | ||||||
|  | 		if(!outlineItem.destination().isNull()) | ||||||
|  | 		{ | ||||||
|  | 			documentOutlineEntry.destinationPage = outlineItem.destination()->pageNumber(); | ||||||
|  | 		} | ||||||
|  | 		if(outlineItem.hasChildren()) | ||||||
|  | 		{ | ||||||
|  | 			documentOutlineEntry.children = createOutline(outlineItem.children()); | ||||||
|  | 		} | ||||||
|  | 		result.append(documentOutlineEntry); | ||||||
|  | 	} | ||||||
|  | 	return result; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | DocumentProcessResult PdfProcessor::process(const QByteArray &data) const | ||||||
|  | { | ||||||
|  | 	DocumentProcessResult result; | ||||||
| 	QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data)); | 	QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(data)); | ||||||
| 	if(doc.isNull()) | 	if(doc.isNull()) | ||||||
| 	{ | 	{ | ||||||
| @@ -26,12 +47,13 @@ QVector<PageData> PdfProcessor::process(const QByteArray &data) const | |||||||
| 	for(auto i = 0; i < pagecount; i++) | 	for(auto i = 0; i < pagecount; i++) | ||||||
| 	{ | 	{ | ||||||
| 		QString text = doc->page(i)->text(entirePage); | 		QString text = doc->page(i)->text(entirePage); | ||||||
| 		result.append({static_cast<unsigned int>(i + 1), text}); | 		result.pages.append({static_cast<unsigned int>(i + 1), text}); | ||||||
| 		/*TODO: hack, so we can fts search several words over the whole document, not just pages. | 		/*TODO: hack, so we can fts search several words over the whole document, not just pages. | ||||||
| 		 * this of course uses more space and should be solved differently. | 		 * this of course uses more space and should be solved differently. | ||||||
| 		 */ | 		 */ | ||||||
| 		entire += text; | 		entire += text; | ||||||
| 	} | 	} | ||||||
| 	result.append({0, entire}); | 	result.pages.append({0, entire}); | ||||||
|  | 	result.outlines = createOutline(doc->outline()); | ||||||
| 	return result; | 	return result; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| #ifndef PDFPROCESSOR_H | #ifndef PDFPROCESSOR_H | ||||||
| #define PDFPROCESSOR_H | #define PDFPROCESSOR_H | ||||||
|  | #include <poppler-qt5.h> | ||||||
| #include "processor.h" | #include "processor.h" | ||||||
| class PdfProcessor : public Processor | class PdfProcessor : public Processor | ||||||
| { | { | ||||||
| @@ -7,7 +8,8 @@ class PdfProcessor : public Processor | |||||||
| 	PdfProcessor(); | 	PdfProcessor(); | ||||||
|  |  | ||||||
|   public: |   public: | ||||||
| 	QVector<PageData> process(const QByteArray &data) const override; | 	QVector<DocumentOutlineEntry> createOutline(const QVector<Poppler::OutlineItem> &outlineItems) const; | ||||||
|  | 	DocumentProcessResult process(const QByteArray &data) const override; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| #endif // PDFPROCESSOR_H | #endif // PDFPROCESSOR_H | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user