2022-04-24 12:30:31 +02:00
|
|
|
#include <QTextStream>
|
2022-08-27 22:18:41 +02:00
|
|
|
#include <QRegularExpression>
|
2022-04-24 12:30:31 +02:00
|
|
|
|
|
|
|
#include "previewgeneratorplaintext.h"
|
|
|
|
#include "previewresultplaintext.h"
|
2022-08-06 10:01:24 +02:00
|
|
|
#include "../shared/limitqueue.h"
|
2022-04-24 12:30:31 +02:00
|
|
|
|
2022-07-24 11:34:52 +02:00
|
|
|
QString PreviewGeneratorPlainText::generatePreviewText(QString content, RenderConfig config, QString fileName)
|
2022-04-24 12:30:31 +02:00
|
|
|
{
|
|
|
|
QString resulText = "";
|
2022-07-24 11:34:52 +02:00
|
|
|
|
2022-04-24 12:30:31 +02:00
|
|
|
QMap<int, QString> snippet;
|
|
|
|
|
2022-07-24 12:25:38 +02:00
|
|
|
int coveredRange = -1;
|
|
|
|
int lastWordPos = -1;
|
2022-04-24 12:30:31 +02:00
|
|
|
|
|
|
|
QHash<QString, int> countmap;
|
|
|
|
|
2022-05-30 18:35:54 +02:00
|
|
|
unsigned int currentSnippets = 0;
|
2022-04-24 12:30:31 +02:00
|
|
|
for(QString &word : config.wordsToHighlight)
|
|
|
|
{
|
|
|
|
|
|
|
|
int lastPos = 0;
|
|
|
|
int index = content.indexOf(word, lastPos, Qt::CaseInsensitive);
|
2022-08-06 09:35:00 +02:00
|
|
|
while(index != -1 && currentSnippets < MAX_SNIPPETS)
|
2022-04-24 12:30:31 +02:00
|
|
|
{
|
|
|
|
countmap[word] = countmap.value(word, 0) + 1;
|
|
|
|
|
|
|
|
if(index >= lastWordPos && index <= coveredRange)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
int begin = index - 50;
|
|
|
|
if(begin < 0)
|
|
|
|
{
|
|
|
|
begin = 0;
|
|
|
|
}
|
|
|
|
int after = index + 50;
|
|
|
|
if(after > content.size())
|
|
|
|
{
|
|
|
|
after = content.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
snippet[index] = "...<br>" + content.mid(begin, after) + "...<br>";
|
|
|
|
coveredRange = after;
|
|
|
|
lastPos = index;
|
|
|
|
|
|
|
|
index = content.indexOf(word, lastPos + 1, Qt::CaseInsensitive);
|
2022-05-30 18:35:54 +02:00
|
|
|
++currentSnippets;
|
2022-04-24 12:30:31 +02:00
|
|
|
}
|
|
|
|
lastWordPos = lastPos;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto i = snippet.constBegin();
|
|
|
|
while(i != snippet.constEnd())
|
|
|
|
{
|
|
|
|
resulText.append(i.value());
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
|
2022-08-28 13:01:44 +02:00
|
|
|
resulText = resulText.toHtmlEscaped();
|
2022-07-24 11:34:52 +02:00
|
|
|
QString header = "<b>" + fileName + "</b> ";
|
2022-04-24 12:30:31 +02:00
|
|
|
for(QString &word : config.wordsToHighlight)
|
|
|
|
{
|
2022-08-06 09:35:00 +02:00
|
|
|
resulText.replace(word, "<span style=\"background-color: yellow;\">" + word + "</span>", Qt::CaseInsensitive);
|
2022-04-24 12:30:31 +02:00
|
|
|
header += word + ": " + QString::number(countmap[word]) + " ";
|
|
|
|
}
|
2022-08-06 09:35:00 +02:00
|
|
|
|
|
|
|
if(currentSnippets == MAX_SNIPPETS)
|
2022-05-30 18:35:54 +02:00
|
|
|
{
|
|
|
|
header += "(truncated)";
|
|
|
|
}
|
|
|
|
|
2022-04-24 12:30:31 +02:00
|
|
|
header += "<hr>";
|
|
|
|
|
2022-07-24 11:34:52 +02:00
|
|
|
return header + resulText.replace("\n", "<br>").mid(0, 1000);
|
|
|
|
}
|
|
|
|
|
2022-08-27 22:18:41 +02:00
|
|
|
struct Snippet
|
|
|
|
{
|
|
|
|
/* Contains each line number and line of the snippet*/
|
|
|
|
QString snippetText;
|
|
|
|
|
|
|
|
/* How many times a word occurs in the snippetText */
|
|
|
|
QHash<QString, int> wordCountMap;
|
|
|
|
};
|
|
|
|
|
2022-08-06 10:01:24 +02:00
|
|
|
QString PreviewGeneratorPlainText::generateLineBasedPreviewText(QTextStream &in, RenderConfig config, QString fileName)
|
|
|
|
{
|
2022-08-27 22:18:41 +02:00
|
|
|
QVector<Snippet> snippets;
|
2022-08-06 10:01:24 +02:00
|
|
|
const unsigned int contextLinesCount = 2;
|
|
|
|
LimitQueue<QString> queue(contextLinesCount);
|
|
|
|
QString currentLine;
|
|
|
|
currentLine.reserve(512);
|
|
|
|
|
|
|
|
/* How many lines to read after a line with a match (like grep -A ) */
|
|
|
|
int justReadLinesCount = -1;
|
|
|
|
|
2022-08-27 22:18:41 +02:00
|
|
|
struct Snippet currentSnippet;
|
2022-08-06 10:01:24 +02:00
|
|
|
|
2022-08-27 22:18:41 +02:00
|
|
|
auto appendLine = [¤tSnippet, &config](int lineNumber, QString &line)
|
|
|
|
{
|
|
|
|
int foundWordsCount = 0;
|
|
|
|
for(QString &word : config.wordsToHighlight)
|
|
|
|
{
|
|
|
|
QRegularExpression searchRegex("\\b" + word + "\\b");
|
2022-08-28 12:44:42 +02:00
|
|
|
bool containsRegex = line.contains(searchRegex);
|
|
|
|
bool contains = false;
|
|
|
|
if(!containsRegex)
|
|
|
|
{
|
|
|
|
contains = line.contains(word, Qt::CaseInsensitive);
|
|
|
|
}
|
|
|
|
if(containsRegex || contains)
|
2022-08-27 22:18:41 +02:00
|
|
|
{
|
|
|
|
currentSnippet.wordCountMap[word] = currentSnippet.wordCountMap.value(word, 0) + 1;
|
2022-08-28 12:44:42 +02:00
|
|
|
QString replacementString = "<span style=\"background-color: yellow;\">" + word + "</span>";
|
|
|
|
if(containsRegex)
|
|
|
|
{
|
|
|
|
line.replace(searchRegex, replacementString);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
line.replace(word, replacementString, Qt::CaseInsensitive);
|
|
|
|
}
|
2022-08-27 22:18:41 +02:00
|
|
|
++foundWordsCount;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
currentSnippet.snippetText.append(QString("<b>%1</b>%2<br>").arg(lineNumber).arg(line));
|
|
|
|
return foundWordsCount;
|
|
|
|
};
|
2022-08-06 10:01:24 +02:00
|
|
|
|
|
|
|
unsigned int lineCount = 0;
|
2022-08-27 22:18:41 +02:00
|
|
|
while(in.readLineInto(¤tLine))
|
2022-08-06 10:01:24 +02:00
|
|
|
{
|
2022-08-28 13:01:44 +02:00
|
|
|
currentLine = currentLine.toHtmlEscaped();
|
2022-08-06 10:01:24 +02:00
|
|
|
++lineCount;
|
|
|
|
bool matched = false;
|
|
|
|
if(justReadLinesCount > 0)
|
|
|
|
{
|
2022-08-27 22:18:41 +02:00
|
|
|
|
|
|
|
int result = appendLine(lineCount, currentLine);
|
|
|
|
if(justReadLinesCount == 1 && result > 0)
|
|
|
|
{
|
|
|
|
justReadLinesCount = contextLinesCount;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
--justReadLinesCount;
|
|
|
|
}
|
|
|
|
|
2022-08-06 10:01:24 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if(justReadLinesCount == 0)
|
|
|
|
{
|
2022-08-27 22:18:41 +02:00
|
|
|
currentSnippet.snippetText += "---<br>";
|
2022-08-06 10:01:24 +02:00
|
|
|
justReadLinesCount = -1;
|
2022-08-27 22:18:41 +02:00
|
|
|
snippets.append(currentSnippet);
|
|
|
|
currentSnippet = {};
|
2022-08-06 10:01:24 +02:00
|
|
|
}
|
|
|
|
for(QString &word : config.wordsToHighlight)
|
|
|
|
{
|
2022-08-28 12:44:42 +02:00
|
|
|
if(currentLine.contains(word, Qt::CaseInsensitive))
|
2022-08-06 10:01:24 +02:00
|
|
|
{
|
|
|
|
matched = true;
|
2022-08-27 22:18:41 +02:00
|
|
|
break;
|
2022-08-06 10:01:24 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if(matched)
|
|
|
|
{
|
|
|
|
while(queue.size() > 0)
|
|
|
|
{
|
|
|
|
int queuedLineCount = lineCount - queue.size();
|
|
|
|
QString queuedLine = queue.dequeue();
|
|
|
|
appendLine(queuedLineCount, queuedLine);
|
|
|
|
}
|
|
|
|
appendLine(lineCount, currentLine);
|
|
|
|
justReadLinesCount = contextLinesCount;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
queue.enqueue(currentLine);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-27 22:18:41 +02:00
|
|
|
if(!currentSnippet.snippetText.isEmpty())
|
2022-08-06 10:01:24 +02:00
|
|
|
{
|
2022-08-27 22:18:41 +02:00
|
|
|
currentSnippet.snippetText += "---<br>";
|
|
|
|
snippets.append(currentSnippet);
|
2022-08-06 10:01:24 +02:00
|
|
|
}
|
2022-08-27 22:18:41 +02:00
|
|
|
|
|
|
|
std::sort(snippets.begin(), snippets.end(),
|
|
|
|
[](Snippet &a, Snippet &b)
|
|
|
|
{
|
|
|
|
int differentWordsA = 0;
|
|
|
|
int totalWordsA = 0;
|
|
|
|
int differentWordsB = 0;
|
|
|
|
int totalWordsB = 0;
|
|
|
|
for(int count : a.wordCountMap.values())
|
|
|
|
{
|
|
|
|
if(count > 0)
|
|
|
|
{
|
|
|
|
++differentWordsA;
|
|
|
|
}
|
|
|
|
totalWordsA += count;
|
|
|
|
}
|
|
|
|
for(int count : b.wordCountMap.values())
|
|
|
|
{
|
|
|
|
if(count > 0)
|
|
|
|
{
|
|
|
|
++differentWordsB;
|
|
|
|
}
|
|
|
|
totalWordsB += count;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(differentWordsA > differentWordsB)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if(differentWordsA == differentWordsB)
|
|
|
|
{
|
|
|
|
return totalWordsA > totalWordsB;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
});
|
|
|
|
|
|
|
|
QString resultText = "";
|
|
|
|
|
|
|
|
unsigned int snippetsCount = 0;
|
|
|
|
|
|
|
|
QString header = "<b>" + fileName + "</b> ";
|
|
|
|
|
|
|
|
QHash<QString, int> totalWordCountMap;
|
|
|
|
bool isTruncated = false;
|
|
|
|
for(Snippet &snippet : snippets)
|
2022-08-06 10:01:24 +02:00
|
|
|
{
|
2022-08-27 22:18:41 +02:00
|
|
|
if(snippetsCount++ < MAX_SNIPPETS)
|
|
|
|
{
|
|
|
|
resultText += snippet.snippetText;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
isTruncated = true;
|
|
|
|
}
|
|
|
|
for(auto it = snippet.wordCountMap.keyValueBegin(); it != snippet.wordCountMap.keyValueEnd(); it++)
|
|
|
|
{
|
|
|
|
totalWordCountMap[it->first] = totalWordCountMap.value(it->first, 0) + it->second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(isTruncated)
|
|
|
|
{
|
|
|
|
header += "(truncated) ";
|
|
|
|
}
|
|
|
|
for(QString &word : config.wordsToHighlight)
|
|
|
|
{
|
|
|
|
header += word + ": " + QString::number(totalWordCountMap[word]) + " ";
|
2022-08-06 10:01:24 +02:00
|
|
|
}
|
|
|
|
header += "<hr>";
|
|
|
|
|
|
|
|
return header + resultText;
|
|
|
|
}
|
|
|
|
|
2022-07-24 11:34:52 +02:00
|
|
|
QSharedPointer<PreviewResult> PreviewGeneratorPlainText::generate(RenderConfig config, QString documentPath,
|
|
|
|
unsigned int page)
|
|
|
|
{
|
|
|
|
PreviewResultPlainText *result = new PreviewResultPlainText(documentPath, page);
|
|
|
|
QFile file(documentPath);
|
|
|
|
if(!file.open(QFile::ReadOnly | QFile::Text))
|
|
|
|
{
|
|
|
|
return QSharedPointer<PreviewResultPlainText>(result);
|
|
|
|
}
|
|
|
|
QTextStream in(&file);
|
|
|
|
QFileInfo info{documentPath};
|
2022-08-06 10:01:24 +02:00
|
|
|
result->setText(generateLineBasedPreviewText(in, config, info.fileName()));
|
2022-05-27 09:28:21 +02:00
|
|
|
return QSharedPointer<PreviewResultPlainText>(result);
|
2022-04-24 12:30:31 +02:00
|
|
|
}
|