first commit
This commit is contained in:
		
							
								
								
									
										18
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,18 @@
 | 
				
			|||||||
 | 
					easyindex
 | 
				
			||||||
 | 
					=========
 | 
				
			||||||
 | 
					easyindex creates a poor-man full-text search for your files using a
 | 
				
			||||||
 | 
					sqlite database.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You need the python "chardet" package, since it will try to convert the 
 | 
				
			||||||
 | 
					encoding of the files in case initial utf-8 decoding fails.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pdftext is needed to search in .pdf files..
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					No GUI is provided at this time, nor does it concern itself with search
 | 
				
			||||||
 | 
					too much. 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Setup
 | 
				
			||||||
 | 
					-----
 | 
				
			||||||
 | 
					sqlite3 easyindex.db < create.sql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										118
									
								
								addindex
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										118
									
								
								addindex
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,118 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/python3
 | 
				
			||||||
 | 
					import sqlite3
 | 
				
			||||||
 | 
					import os.path
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import subprocess
 | 
				
			||||||
 | 
					import zipfile
 | 
				
			||||||
 | 
					import xml.etree.ElementTree
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					import chardet
 | 
				
			||||||
 | 
					import config
 | 
				
			||||||
 | 
					dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def striptags(content):
 | 
				
			||||||
 | 
						result = ""
 | 
				
			||||||
 | 
						try:
 | 
				
			||||||
 | 
							result = ''.join(xml.etree.ElementTree.fromstring(content).itertext())
 | 
				
			||||||
 | 
						except:
 | 
				
			||||||
 | 
							#TODO: test<br>test2 will make it testtest2 not test test2
 | 
				
			||||||
 | 
							result = re.sub('<[^>]*>', '', content)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						return result
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def strip_irrelevant(content):
 | 
				
			||||||
 | 
						result = content.replace("\n", " ").replace("\t", " ").replace("\f", "")
 | 
				
			||||||
 | 
						result = re.sub(' +', ' ', result)
 | 
				
			||||||
 | 
						return result;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					def process_pdf(path):
 | 
				
			||||||
 | 
						args=["pdftotext", path , "-"]
 | 
				
			||||||
 | 
						stdout,stderr =  subprocess.Popen(args,stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
 | 
				
			||||||
 | 
						result = strip_irrelevant(stdout.decode('utf-8'))
 | 
				
			||||||
 | 
						return result
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					def process_odt(path):
 | 
				
			||||||
 | 
						fd = zipfile.ZipFile(path)
 | 
				
			||||||
 | 
						content = fd.read("content.xml").decode("utf-8")
 | 
				
			||||||
 | 
						fd.close()
 | 
				
			||||||
 | 
						return striptags(content)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					def process_striptags(path):
 | 
				
			||||||
 | 
						content = process_text(path)
 | 
				
			||||||
 | 
						return striptags(content)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					def process_text(path):
 | 
				
			||||||
 | 
						fd = open(path, "rb")
 | 
				
			||||||
 | 
						content = fd.read()
 | 
				
			||||||
 | 
						fd.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						result=""
 | 
				
			||||||
 | 
						try:
 | 
				
			||||||
 | 
							return str(content.decode("utf-8"))
 | 
				
			||||||
 | 
						except:
 | 
				
			||||||
 | 
							pass
 | 
				
			||||||
 | 
						try:
 | 
				
			||||||
 | 
							encoding = chardet.detect(content)["encoding"];
 | 
				
			||||||
 | 
							if encoding == None:
 | 
				
			||||||
 | 
								return ""
 | 
				
			||||||
 | 
							result = str(content.decode(encoding))
 | 
				
			||||||
 | 
						except:
 | 
				
			||||||
 | 
							print("FAILE DECODING: " + path)
 | 
				
			||||||
 | 
							return ""
 | 
				
			||||||
 | 
						return result
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					def process_nothing(path):
 | 
				
			||||||
 | 
						return ""
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					def exists(abspath, mtime):
 | 
				
			||||||
 | 
						cursor = dbcon.cursor()
 | 
				
			||||||
 | 
						cursor.execute("SELECT 1 FROM file WHERE path = ? AND mtime = ?" , (abspath, mtime))
 | 
				
			||||||
 | 
						result = cursor.fetchone()
 | 
				
			||||||
 | 
						if result != None and result[0] == 1:
 | 
				
			||||||
 | 
							return True
 | 
				
			||||||
 | 
						return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def insert(path, cursor):
 | 
				
			||||||
 | 
						print("processing", path)
 | 
				
			||||||
 | 
						abspath=os.path.abspath(path)
 | 
				
			||||||
 | 
						mtime = int(os.stat(abspath).st_mtime)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						if exists(abspath, mtime):
 | 
				
			||||||
 | 
					            print("Leaving alone " + abspath + " because it wasn't changed")
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
						basename=os.path.basename(abspath)
 | 
				
			||||||
 | 
						ext = os.path.splitext(abspath)[1]
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						content=""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						processor=process_nothing
 | 
				
			||||||
 | 
						if ext in preprocess:
 | 
				
			||||||
 | 
							processor=preprocess[ext]
 | 
				
			||||||
 | 
						content = processor(abspath)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						#if update:
 | 
				
			||||||
 | 
						#	cursor.execute("UPDATE file SET path = ?, mtime = ?, content =
 | 
				
			||||||
 | 
						cursor.execute("INSERT OR REPLACE INTO file(path, mtime, content) VALUES(?, ?, ?) ", (abspath, mtime, content))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					preprocess={".pdf":process_pdf, ".odt":process_odt, ".html":process_striptags, ".xml":process_nothing, ".txt":process_text, 
 | 
				
			||||||
 | 
								".sql":process_text, ".c":process_text, ".cpp":process_text, ".js":process_text, ".java":process_text, 
 | 
				
			||||||
 | 
								".py":process_text, '.md':process_text}	
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cursor = dbcon.cursor()
 | 
				
			||||||
 | 
					cursor.execute("BEGIN TRANSACTION")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if len(sys.argv) < 2:
 | 
				
			||||||
 | 
						for line in sys.stdin:
 | 
				
			||||||
 | 
							insert(line.replace("\n", ""), cursor)
 | 
				
			||||||
 | 
					else:
 | 
				
			||||||
 | 
						for inputfile in sys.argv[1:]:
 | 
				
			||||||
 | 
							insert(inputfile, cursor)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cursor.execute("COMMIT TRANSACTION")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					dbcon.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										15
									
								
								create.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								create.sql
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,15 @@
 | 
				
			|||||||
 | 
					-- Create a table. And an external content fts5 table to index it.
 | 
				
			||||||
 | 
					CREATE TABLE file(id INTEGER PRIMARY KEY, path varchar(4096) UNIQUE, mtime integer, content text);
 | 
				
			||||||
 | 
					CREATE VIRTUAL TABLE file_fts USING fts5(content, content='file', content_rowid='id');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- Triggers to keep the FTS index up to date.
 | 
				
			||||||
 | 
					CREATE TRIGGER file_ai AFTER INSERT ON file BEGIN
 | 
				
			||||||
 | 
					  INSERT INTO file_fts(rowid, content) VALUES (new.id, new.content);
 | 
				
			||||||
 | 
					END;
 | 
				
			||||||
 | 
					CREATE TRIGGER file_ad AFTER DELETE ON file BEGIN
 | 
				
			||||||
 | 
					  INSERT INTO file_fts(file_fts, rowid, content) VALUES('delete', old.id, old.content);
 | 
				
			||||||
 | 
					END;
 | 
				
			||||||
 | 
					CREATE TRIGGER file_au AFTER UPDATE ON file BEGIN
 | 
				
			||||||
 | 
					  INSERT INTO file_fts(file_fts, rowid, content) VALUES('delete', old.id, old.content);
 | 
				
			||||||
 | 
					  INSERT INTO file_fts(rowid, content) VALUES (new.id, new.content);
 | 
				
			||||||
 | 
					END;
 | 
				
			||||||
							
								
								
									
										21
									
								
								delindex
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										21
									
								
								delindex
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,21 @@
 | 
				
			|||||||
 | 
					#!/bin/sh
 | 
				
			||||||
 | 
					TEMPFILE=$(mktemp)
 | 
				
			||||||
 | 
					DBFILE="/home/db/easyindex.sqlite"
 | 
				
			||||||
 | 
					function todelete()
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						echo "DELETE FROM file WHERE path = '$1';" >> /"$TEMPFILE"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo "BEGIN TRANSACTION;" >> /"$TEMPFILE"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sqlite3 "$DBFILE" "SELECT path FROM file;"| while read line ; do
 | 
				
			||||||
 | 
					[ -e "$line" ] || todelete "$line"
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo "COMMIT TRANSACTION;" >> /"$TEMPFILE"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sqlite3 "$DBFILE" < /"$TEMPFILE"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										21
									
								
								searchindex
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										21
									
								
								searchindex
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,21 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/python3
 | 
				
			||||||
 | 
					import sqlite3
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import config
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
 | 
				
			||||||
 | 
					cursor = dbcon.cursor()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if len(sys.argv) < 2:
 | 
				
			||||||
 | 
						print("Error: Missing search")
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					search=sys.argv[1:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for row in cursor.execute("SELECT file.path FROM file INNER JOIN file_fts ON file.id = file_fts.ROWID WHERE file_fts.content MATCH ? ORDER By file.mtime ASC", (search)):
 | 
				
			||||||
 | 
						print(row[0])
 | 
				
			||||||
 | 
					dbcon.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Reference in New Issue
	
	Block a user