1
0

gui: PreviewGeneratorPlainText: Rework snippets selection

The current snippet selection is useless for many queries.

Attempt a more reasonable snippet selection by prioritizing
those where all words are contained. The more words a snippet
has, the more important it is considered.Therefore, those will
be at the top.

Only highlight whole words
Este cometimento está contido em:
Albert S. 2022-08-27 22:18:41 +02:00
ascendente e6a0c0daee
cometimento 5a47f5949f

Ver ficheiro

@ -1,4 +1,5 @@
#include <QTextStream> #include <QTextStream>
#include <QRegularExpression>
#include "previewgeneratorplaintext.h" #include "previewgeneratorplaintext.h"
#include "previewresultplaintext.h" #include "previewresultplaintext.h"
@ -74,9 +75,18 @@ QString PreviewGeneratorPlainText::generatePreviewText(QString content, RenderCo
return header + resulText.replace("\n", "<br>").mid(0, 1000); return header + resulText.replace("\n", "<br>").mid(0, 1000);
} }
struct Snippet
{
/* Contains each line number and line of the snippet*/
QString snippetText;
/* How many times a word occurs in the snippetText */
QHash<QString, int> wordCountMap;
};
QString PreviewGeneratorPlainText::generateLineBasedPreviewText(QTextStream &in, RenderConfig config, QString fileName) QString PreviewGeneratorPlainText::generateLineBasedPreviewText(QTextStream &in, RenderConfig config, QString fileName)
{ {
QString resultText; QVector<Snippet> snippets;
const unsigned int contextLinesCount = 2; const unsigned int contextLinesCount = 2;
LimitQueue<QString> queue(contextLinesCount); LimitQueue<QString> queue(contextLinesCount);
QString currentLine; QString currentLine;
@ -85,38 +95,58 @@ QString PreviewGeneratorPlainText::generateLineBasedPreviewText(QTextStream &in,
/* How many lines to read after a line with a match (like grep -A ) */ /* How many lines to read after a line with a match (like grep -A ) */
int justReadLinesCount = -1; int justReadLinesCount = -1;
auto appendLine = [&resultText](int lineNumber, QString &line) struct Snippet currentSnippet;
{ resultText.append(QString("<b>%1</b>%2<br>").arg(lineNumber).arg(line)); };
QHash<QString, int> countmap; auto appendLine = [&currentSnippet, &config](int lineNumber, QString &line)
QString header = "<b>" + fileName + "</b> "; {
int foundWordsCount = 0;
for(QString &word : config.wordsToHighlight)
{
QRegularExpression searchRegex("\\b" + word + "\\b");
if(line.contains(searchRegex))
{
currentSnippet.wordCountMap[word] = currentSnippet.wordCountMap.value(word, 0) + 1;
line.replace(searchRegex, "<span style=\"background-color: yellow;\">" + word + "</span>");
++foundWordsCount;
}
}
currentSnippet.snippetText.append(QString("<b>%1</b>%2<br>").arg(lineNumber).arg(line));
return foundWordsCount;
};
unsigned int snippetsCount = 0;
unsigned int lineCount = 0; unsigned int lineCount = 0;
while(in.readLineInto(&currentLine) && snippetsCount < MAX_SNIPPETS) while(in.readLineInto(&currentLine))
{ {
++lineCount; ++lineCount;
bool matched = false; bool matched = false;
if(justReadLinesCount > 0) if(justReadLinesCount > 0)
{ {
appendLine(lineCount, currentLine);
int result = appendLine(lineCount, currentLine);
if(justReadLinesCount == 1 && result > 0)
{
justReadLinesCount = contextLinesCount;
}
else
{
--justReadLinesCount; --justReadLinesCount;
}
continue; continue;
} }
if(justReadLinesCount == 0) if(justReadLinesCount == 0)
{ {
resultText += "---<br>"; currentSnippet.snippetText += "---<br>";
justReadLinesCount = -1; justReadLinesCount = -1;
++snippetsCount; snippets.append(currentSnippet);
currentSnippet = {};
} }
for(QString &word : config.wordsToHighlight) for(QString &word : config.wordsToHighlight)
{ {
if(currentLine.contains(word, Qt::CaseInsensitive)) if(currentLine.contains(QRegularExpression("\\b" + word + "\\b")))
{ {
countmap[word] = countmap.value(word, 0) + 1;
matched = true; matched = true;
currentLine.replace(word, "<span style=\"background-color: yellow;\">" + word + "</span>", break;
Qt::CaseInsensitive);
} }
} }
if(matched) if(matched)
@ -125,7 +155,6 @@ QString PreviewGeneratorPlainText::generateLineBasedPreviewText(QTextStream &in,
{ {
int queuedLineCount = lineCount - queue.size(); int queuedLineCount = lineCount - queue.size();
QString queuedLine = queue.dequeue(); QString queuedLine = queue.dequeue();
appendLine(queuedLineCount, queuedLine); appendLine(queuedLineCount, queuedLine);
} }
appendLine(lineCount, currentLine); appendLine(lineCount, currentLine);
@ -137,14 +166,78 @@ QString PreviewGeneratorPlainText::generateLineBasedPreviewText(QTextStream &in,
} }
} }
for(QString &word : config.wordsToHighlight) if(!currentSnippet.snippetText.isEmpty())
{ {
header += word + ": " + QString::number(countmap[word]) + " "; currentSnippet.snippetText += "---<br>";
snippets.append(currentSnippet);
} }
if(snippetsCount == MAX_SNIPPETS)
std::sort(snippets.begin(), snippets.end(),
[](Snippet &a, Snippet &b)
{
int differentWordsA = 0;
int totalWordsA = 0;
int differentWordsB = 0;
int totalWordsB = 0;
for(int count : a.wordCountMap.values())
{
if(count > 0)
{
++differentWordsA;
}
totalWordsA += count;
}
for(int count : b.wordCountMap.values())
{
if(count > 0)
{
++differentWordsB;
}
totalWordsB += count;
}
if(differentWordsA > differentWordsB)
{
return true;
}
if(differentWordsA == differentWordsB)
{
return totalWordsA > totalWordsB;
}
return false;
});
QString resultText = "";
unsigned int snippetsCount = 0;
QString header = "<b>" + fileName + "</b> ";
QHash<QString, int> totalWordCountMap;
bool isTruncated = false;
for(Snippet &snippet : snippets)
{
if(snippetsCount++ < MAX_SNIPPETS)
{
resultText += snippet.snippetText;
}
else
{
isTruncated = true;
}
for(auto it = snippet.wordCountMap.keyValueBegin(); it != snippet.wordCountMap.keyValueEnd(); it++)
{
totalWordCountMap[it->first] = totalWordCountMap.value(it->first, 0) + it->second;
}
}
if(isTruncated)
{ {
header += "(truncated) "; header += "(truncated) ";
} }
for(QString &word : config.wordsToHighlight)
{
header += word + ": " + QString::number(totalWordCountMap[word]) + " ";
}
header += "<hr>"; header += "<hr>";
return header + resultText; return header + resultText;