first commit
This commit is contained in:
		
							
								
								
									
										18
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| easyindex | ||||
| ========= | ||||
| easyindex creates a poor-man full-text search for your files using a | ||||
| sqlite database. | ||||
|  | ||||
| You need the python "chardet" package, since it will try to convert the  | ||||
| encoding of the files in case initial utf-8 decoding fails. | ||||
|  | ||||
| pdftext is needed to search in .pdf files.. | ||||
|  | ||||
| No GUI is provided at this time, nor does it concern itself with search | ||||
| too much.  | ||||
|  | ||||
| Setup | ||||
| ----- | ||||
| sqlite3 easyindex.db < create.sql | ||||
|  | ||||
|  | ||||
							
								
								
									
										118
									
								
								addindex
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										118
									
								
								addindex
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,118 @@ | ||||
| #!/usr/bin/python3 | ||||
| import sqlite3 | ||||
| import os.path | ||||
| import sys | ||||
| import subprocess | ||||
| import zipfile | ||||
| import xml.etree.ElementTree | ||||
| import re | ||||
| import chardet | ||||
| import config | ||||
| dbcon = sqlite3.connect(config.DBPATH, isolation_level=None) | ||||
|  | ||||
| def striptags(content): | ||||
| 	result = "" | ||||
| 	try: | ||||
| 		result = ''.join(xml.etree.ElementTree.fromstring(content).itertext()) | ||||
| 	except: | ||||
| 		#TODO: test<br>test2 will make it testtest2 not test test2 | ||||
| 		result = re.sub('<[^>]*>', '', content) | ||||
| 	 | ||||
| 	return result | ||||
| 	 | ||||
|  | ||||
| def strip_irrelevant(content): | ||||
| 	result = content.replace("\n", " ").replace("\t", " ").replace("\f", "") | ||||
| 	result = re.sub(' +', ' ', result) | ||||
| 	return result; | ||||
| 	 | ||||
| def process_pdf(path): | ||||
| 	args=["pdftotext", path , "-"] | ||||
| 	stdout,stderr =  subprocess.Popen(args,stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() | ||||
| 	result = strip_irrelevant(stdout.decode('utf-8')) | ||||
| 	return result | ||||
| 	 | ||||
| def process_odt(path): | ||||
| 	fd = zipfile.ZipFile(path) | ||||
| 	content = fd.read("content.xml").decode("utf-8") | ||||
| 	fd.close() | ||||
| 	return striptags(content) | ||||
| 	 | ||||
| def process_striptags(path): | ||||
| 	content = process_text(path) | ||||
| 	return striptags(content) | ||||
| 	 | ||||
| def process_text(path): | ||||
| 	fd = open(path, "rb") | ||||
| 	content = fd.read() | ||||
| 	fd.close() | ||||
|  | ||||
| 	result="" | ||||
| 	try: | ||||
| 		return str(content.decode("utf-8")) | ||||
| 	except: | ||||
| 		pass | ||||
| 	try: | ||||
| 		encoding = chardet.detect(content)["encoding"]; | ||||
| 		if encoding == None: | ||||
| 			return "" | ||||
| 		result = str(content.decode(encoding)) | ||||
| 	except: | ||||
| 		print("FAILE DECODING: " + path) | ||||
| 		return "" | ||||
| 	return result | ||||
| 	 | ||||
| def process_nothing(path): | ||||
| 	return "" | ||||
| 	 | ||||
| def exists(abspath, mtime): | ||||
| 	cursor = dbcon.cursor() | ||||
| 	cursor.execute("SELECT 1 FROM file WHERE path = ? AND mtime = ?" , (abspath, mtime)) | ||||
| 	result = cursor.fetchone() | ||||
| 	if result != None and result[0] == 1: | ||||
| 		return True | ||||
| 	return False | ||||
|  | ||||
| def insert(path, cursor): | ||||
| 	print("processing", path) | ||||
| 	abspath=os.path.abspath(path) | ||||
| 	mtime = int(os.stat(abspath).st_mtime) | ||||
| 	 | ||||
| 	if exists(abspath, mtime): | ||||
|             print("Leaving alone " + abspath + " because it wasn't changed") | ||||
|             return | ||||
| 	basename=os.path.basename(abspath) | ||||
| 	ext = os.path.splitext(abspath)[1] | ||||
| 	 | ||||
| 	content="" | ||||
|  | ||||
| 	processor=process_nothing | ||||
| 	if ext in preprocess: | ||||
| 		processor=preprocess[ext] | ||||
| 	content = processor(abspath) | ||||
|  | ||||
| 	#if update: | ||||
| 	#	cursor.execute("UPDATE file SET path = ?, mtime = ?, content = | ||||
| 	cursor.execute("INSERT OR REPLACE INTO file(path, mtime, content) VALUES(?, ?, ?) ", (abspath, mtime, content)) | ||||
|  | ||||
| preprocess={".pdf":process_pdf, ".odt":process_odt, ".html":process_striptags, ".xml":process_nothing, ".txt":process_text,  | ||||
| 			".sql":process_text, ".c":process_text, ".cpp":process_text, ".js":process_text, ".java":process_text,  | ||||
| 			".py":process_text, '.md':process_text}	 | ||||
|  | ||||
| cursor = dbcon.cursor() | ||||
| cursor.execute("BEGIN TRANSACTION") | ||||
|  | ||||
| if len(sys.argv) < 2: | ||||
| 	for line in sys.stdin: | ||||
| 		insert(line.replace("\n", ""), cursor) | ||||
| else: | ||||
| 	for inputfile in sys.argv[1:]: | ||||
| 		insert(inputfile, cursor) | ||||
|  | ||||
| cursor.execute("COMMIT TRANSACTION") | ||||
|  | ||||
| dbcon.close() | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
							
								
								
									
										15
									
								
								create.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								create.sql
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| -- Create a table. And an external content fts5 table to index it. | ||||
| CREATE TABLE file(id INTEGER PRIMARY KEY, path varchar(4096) UNIQUE, mtime integer, content text); | ||||
| CREATE VIRTUAL TABLE file_fts USING fts5(content, content='file', content_rowid='id'); | ||||
|  | ||||
| -- Triggers to keep the FTS index up to date. | ||||
| CREATE TRIGGER file_ai AFTER INSERT ON file BEGIN | ||||
|   INSERT INTO file_fts(rowid, content) VALUES (new.id, new.content); | ||||
| END; | ||||
| CREATE TRIGGER file_ad AFTER DELETE ON file BEGIN | ||||
|   INSERT INTO file_fts(file_fts, rowid, content) VALUES('delete', old.id, old.content); | ||||
| END; | ||||
| CREATE TRIGGER file_au AFTER UPDATE ON file BEGIN | ||||
|   INSERT INTO file_fts(file_fts, rowid, content) VALUES('delete', old.id, old.content); | ||||
|   INSERT INTO file_fts(rowid, content) VALUES (new.id, new.content); | ||||
| END; | ||||
							
								
								
									
										21
									
								
								delindex
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										21
									
								
								delindex
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,21 @@ | ||||
| #!/bin/sh | ||||
| TEMPFILE=$(mktemp) | ||||
| DBFILE="/home/db/easyindex.sqlite" | ||||
| function todelete() | ||||
| { | ||||
| 	echo "DELETE FROM file WHERE path = '$1';" >> /"$TEMPFILE" | ||||
| } | ||||
|  | ||||
| echo "BEGIN TRANSACTION;" >> /"$TEMPFILE" | ||||
|  | ||||
| sqlite3 "$DBFILE" "SELECT path FROM file;"| while read line ; do | ||||
| [ -e "$line" ] || todelete "$line" | ||||
| done | ||||
|  | ||||
| echo "COMMIT TRANSACTION;" >> /"$TEMPFILE" | ||||
|  | ||||
| sqlite3 "$DBFILE" < /"$TEMPFILE" | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
							
								
								
									
										21
									
								
								searchindex
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										21
									
								
								searchindex
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,21 @@ | ||||
| #!/usr/bin/python3 | ||||
| import sqlite3 | ||||
| import sys | ||||
| import config | ||||
|  | ||||
| dbcon = sqlite3.connect(config.DBPATH, isolation_level=None) | ||||
| cursor = dbcon.cursor() | ||||
|  | ||||
| if len(sys.argv) < 2: | ||||
| 	print("Error: Missing search") | ||||
| 	 | ||||
| search=sys.argv[1:] | ||||
|  | ||||
| for row in cursor.execute("SELECT file.path FROM file INNER JOIN file_fts ON file.id = file_fts.ROWID WHERE file_fts.content MATCH ? ORDER By file.mtime ASC", (search)): | ||||
| 	print(row[0]) | ||||
| dbcon.close() | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
		Reference in New Issue
	
	Block a user