added multiprocess support (quick hack)

This commit is contained in:
Albert S. 2018-08-09 22:47:33 +02:00
parent 6c07601c84
commit d1d317d5af

View File

@ -8,7 +8,8 @@ import xml.etree.ElementTree
import re import re
import chardet import chardet
import config import config
dbcon = sqlite3.connect(config.DBPATH, isolation_level=None) from multiprocessing import Pool
class pagedata: class pagedata:
@ -131,6 +132,8 @@ def insert(path, cursor):
processor=preprocess[ext] processor=preprocess[ext]
pagedatalist = processor(abspath) pagedatalist = processor(abspath)
#TODO: assumes sqlitehas been built with thread safety (and it is the default)
cursor = dbcon.cursor()
cursor.execute("BEGIN TRANSACTION") cursor.execute("BEGIN TRANSACTION")
cursor.execute("DELETE FROM file WHERE path = ?", (abspath,)) cursor.execute("DELETE FROM file WHERE path = ?", (abspath,))
cursor.execute("INSERT INTO file(path, mtime) VALUES(?, ?) ", (abspath, mtime)) cursor.execute("INSERT INTO file(path, mtime) VALUES(?, ?) ", (abspath, mtime))
@ -143,14 +146,31 @@ preprocess={".pdf":process_pdf, ".odt":process_odt, ".html":process_striptags, "
".sql":process_text, ".c":process_text, ".cpp":process_text, ".js":process_text, ".java":process_text, ".sql":process_text, ".c":process_text, ".cpp":process_text, ".js":process_text, ".java":process_text,
".py":process_text, '.md':process_text} ".py":process_text, '.md':process_text}
cursor = dbcon.cursor()
if len(sys.argv) < 2:
def yieldstdinfiles():
for line in sys.stdin: for line in sys.stdin:
insert(line.replace("\n", ""), cursor) yield line.replace("\n", "")
def poolinserter(path):
insert(path, cursor)
def init():
global cursor
global dbcon
dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
cursor = None
dbcon = None
if __name__ == '__main__':
with Pool(processes=4,initializer=init) as pool:
if len(sys.argv) < 2:
pool.map(poolinserter, yieldstdinfiles)
else: else:
for inputfile in sys.argv[1:]: pool.map(poolinserter, sys.argv[1:])
insert(inputfile, cursor)
dbcon.close()