first commit
Šī revīzija ir iekļauta:
revīzija
364d56f5d4
18
README.md
Parasts fails
18
README.md
Parasts fails
@ -0,0 +1,18 @@
|
|||||||
|
easyindex
|
||||||
|
=========
|
||||||
|
easyindex creates a poor-man full-text search for your files using a
|
||||||
|
sqlite database.
|
||||||
|
|
||||||
|
You need the python "chardet" package, since it will try to convert the
|
||||||
|
encoding of the files in case initial utf-8 decoding fails.
|
||||||
|
|
||||||
|
pdftext is needed to search in .pdf files..
|
||||||
|
|
||||||
|
No GUI is provided at this time, nor does it concern itself with search
|
||||||
|
too much.
|
||||||
|
|
||||||
|
Setup
|
||||||
|
-----
|
||||||
|
sqlite3 easyindex.db < create.sql
|
||||||
|
|
||||||
|
|
118
addindex
Izpildāmais fails
118
addindex
Izpildāmais fails
@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
import sqlite3
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import zipfile
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
import re
|
||||||
|
import chardet
|
||||||
|
import config
|
||||||
|
dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
|
||||||
|
|
||||||
|
def striptags(content):
|
||||||
|
result = ""
|
||||||
|
try:
|
||||||
|
result = ''.join(xml.etree.ElementTree.fromstring(content).itertext())
|
||||||
|
except:
|
||||||
|
#TODO: test<br>test2 will make it testtest2 not test test2
|
||||||
|
result = re.sub('<[^>]*>', '', content)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def strip_irrelevant(content):
|
||||||
|
result = content.replace("\n", " ").replace("\t", " ").replace("\f", "")
|
||||||
|
result = re.sub(' +', ' ', result)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
def process_pdf(path):
|
||||||
|
args=["pdftotext", path , "-"]
|
||||||
|
stdout,stderr = subprocess.Popen(args,stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
|
||||||
|
result = strip_irrelevant(stdout.decode('utf-8'))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def process_odt(path):
|
||||||
|
fd = zipfile.ZipFile(path)
|
||||||
|
content = fd.read("content.xml").decode("utf-8")
|
||||||
|
fd.close()
|
||||||
|
return striptags(content)
|
||||||
|
|
||||||
|
def process_striptags(path):
|
||||||
|
content = process_text(path)
|
||||||
|
return striptags(content)
|
||||||
|
|
||||||
|
def process_text(path):
|
||||||
|
fd = open(path, "rb")
|
||||||
|
content = fd.read()
|
||||||
|
fd.close()
|
||||||
|
|
||||||
|
result=""
|
||||||
|
try:
|
||||||
|
return str(content.decode("utf-8"))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
encoding = chardet.detect(content)["encoding"];
|
||||||
|
if encoding == None:
|
||||||
|
return ""
|
||||||
|
result = str(content.decode(encoding))
|
||||||
|
except:
|
||||||
|
print("FAILE DECODING: " + path)
|
||||||
|
return ""
|
||||||
|
return result
|
||||||
|
|
||||||
|
def process_nothing(path):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def exists(abspath, mtime):
|
||||||
|
cursor = dbcon.cursor()
|
||||||
|
cursor.execute("SELECT 1 FROM file WHERE path = ? AND mtime = ?" , (abspath, mtime))
|
||||||
|
result = cursor.fetchone()
|
||||||
|
if result != None and result[0] == 1:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def insert(path, cursor):
|
||||||
|
print("processing", path)
|
||||||
|
abspath=os.path.abspath(path)
|
||||||
|
mtime = int(os.stat(abspath).st_mtime)
|
||||||
|
|
||||||
|
if exists(abspath, mtime):
|
||||||
|
print("Leaving alone " + abspath + " because it wasn't changed")
|
||||||
|
return
|
||||||
|
basename=os.path.basename(abspath)
|
||||||
|
ext = os.path.splitext(abspath)[1]
|
||||||
|
|
||||||
|
content=""
|
||||||
|
|
||||||
|
processor=process_nothing
|
||||||
|
if ext in preprocess:
|
||||||
|
processor=preprocess[ext]
|
||||||
|
content = processor(abspath)
|
||||||
|
|
||||||
|
#if update:
|
||||||
|
# cursor.execute("UPDATE file SET path = ?, mtime = ?, content =
|
||||||
|
cursor.execute("INSERT OR REPLACE INTO file(path, mtime, content) VALUES(?, ?, ?) ", (abspath, mtime, content))
|
||||||
|
|
||||||
|
preprocess={".pdf":process_pdf, ".odt":process_odt, ".html":process_striptags, ".xml":process_nothing, ".txt":process_text,
|
||||||
|
".sql":process_text, ".c":process_text, ".cpp":process_text, ".js":process_text, ".java":process_text,
|
||||||
|
".py":process_text, '.md':process_text}
|
||||||
|
|
||||||
|
cursor = dbcon.cursor()
|
||||||
|
cursor.execute("BEGIN TRANSACTION")
|
||||||
|
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
for line in sys.stdin:
|
||||||
|
insert(line.replace("\n", ""), cursor)
|
||||||
|
else:
|
||||||
|
for inputfile in sys.argv[1:]:
|
||||||
|
insert(inputfile, cursor)
|
||||||
|
|
||||||
|
cursor.execute("COMMIT TRANSACTION")
|
||||||
|
|
||||||
|
dbcon.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
1
config.py
Parasts fails
1
config.py
Parasts fails
@ -0,0 +1 @@
|
|||||||
|
DBPATH="/home/db/easyindex.sqlite"
|
15
create.sql
Parasts fails
15
create.sql
Parasts fails
@ -0,0 +1,15 @@
|
|||||||
|
-- Create a table. And an external content fts5 table to index it.
|
||||||
|
CREATE TABLE file(id INTEGER PRIMARY KEY, path varchar(4096) UNIQUE, mtime integer, content text);
|
||||||
|
CREATE VIRTUAL TABLE file_fts USING fts5(content, content='file', content_rowid='id');
|
||||||
|
|
||||||
|
-- Triggers to keep the FTS index up to date.
|
||||||
|
CREATE TRIGGER file_ai AFTER INSERT ON file BEGIN
|
||||||
|
INSERT INTO file_fts(rowid, content) VALUES (new.id, new.content);
|
||||||
|
END;
|
||||||
|
CREATE TRIGGER file_ad AFTER DELETE ON file BEGIN
|
||||||
|
INSERT INTO file_fts(file_fts, rowid, content) VALUES('delete', old.id, old.content);
|
||||||
|
END;
|
||||||
|
CREATE TRIGGER file_au AFTER UPDATE ON file BEGIN
|
||||||
|
INSERT INTO file_fts(file_fts, rowid, content) VALUES('delete', old.id, old.content);
|
||||||
|
INSERT INTO file_fts(rowid, content) VALUES (new.id, new.content);
|
||||||
|
END;
|
21
delindex
Izpildāmais fails
21
delindex
Izpildāmais fails
@ -0,0 +1,21 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
TEMPFILE=$(mktemp)
|
||||||
|
DBFILE="/home/db/easyindex.sqlite"
|
||||||
|
function todelete()
|
||||||
|
{
|
||||||
|
echo "DELETE FROM file WHERE path = '$1';" >> /"$TEMPFILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "BEGIN TRANSACTION;" >> /"$TEMPFILE"
|
||||||
|
|
||||||
|
sqlite3 "$DBFILE" "SELECT path FROM file;"| while read line ; do
|
||||||
|
[ -e "$line" ] || todelete "$line"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "COMMIT TRANSACTION;" >> /"$TEMPFILE"
|
||||||
|
|
||||||
|
sqlite3 "$DBFILE" < /"$TEMPFILE"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
21
searchindex
Izpildāmais fails
21
searchindex
Izpildāmais fails
@ -0,0 +1,21 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import config
|
||||||
|
|
||||||
|
dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
|
||||||
|
cursor = dbcon.cursor()
|
||||||
|
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Error: Missing search")
|
||||||
|
|
||||||
|
search=sys.argv[1:]
|
||||||
|
|
||||||
|
for row in cursor.execute("SELECT file.path FROM file INNER JOIN file_fts ON file.id = file_fts.ROWID WHERE file_fts.content MATCH ? ORDER By file.mtime ASC", (search)):
|
||||||
|
print(row[0])
|
||||||
|
dbcon.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Notiek ielāde…
Atsaukties uz šo jaunā problēmā
Block a user