addindex: process ods and minor improvements

This commit is contained in:
Albert S. 2018-12-29 20:19:54 +01:00
parent 9c30fa70a1
commit 5081f559df

View File

@ -73,6 +73,9 @@ def process_odt(path):
fd.close() fd.close()
return singlepagelist(striptags(content)) return singlepagelist(striptags(content))
def process_ods(path):
return process_odt(path)
def readtext(path): def readtext(path):
fd = open(path, "rb") fd = open(path, "rb")
content = fd.read() content = fd.read()
@ -89,7 +92,7 @@ def readtext(path):
else: else:
result = str(content.decode(encoding)) result = str(content.decode(encoding))
except: except:
print("FAILE DECODING: " + path) print("FAILED DECODING: " + path)
result = "" result = ""
return result return result
@ -139,17 +142,10 @@ def insert(path):
cursor.execute("INSERT INTO content(fileid, page, content) VALUES(?, ?, ?)", (fileid, pagedata.page, pagedata.content)) cursor.execute("INSERT INTO content(fileid, page, content) VALUES(?, ?, ?)", (fileid, pagedata.page, pagedata.content))
cursor.execute("COMMIT TRANSACTION") cursor.execute("COMMIT TRANSACTION")
preprocess={".pdf":process_pdf, ".odt":process_odt, ".html":process_striptags, ".xml":process_nothing, ".txt":process_text, preprocess={".pdf":process_pdf, ".odt":process_odt, ".ods":process_ods, ".html":process_striptags, ".xml":process_nothing, ".txt":process_text,
".sql":process_text, ".c":process_text, ".cpp":process_text, ".js":process_text, ".java":process_text, ".sql":process_text, ".c":process_text, ".cpp":process_text, ".js":process_text, ".java":process_text,
".py":process_text, '.md':process_text} ".py":process_text, '.md':process_text}
def yieldstdinfiles():
for line in sys.stdin:
yield line.replace("\n", "")
def init(): def init():
global dbcon global dbcon
dbcon = sqlite3.connect(config.DBPATH, isolation_level=None) dbcon = sqlite3.connect(config.DBPATH, isolation_level=None)
@ -160,7 +156,7 @@ if __name__ == '__main__':
with Pool(processes=4,initializer=init) as pool: with Pool(processes=4,initializer=init) as pool:
if len(sys.argv) < 2: if len(sys.argv) < 2:
pool.map(insert, (l for l in yieldstdinfiles())) pool.map(insert, (l.replace("\n", "") for l in sys.stdin))
else: else:
pool.map(insert, sys.argv[1:]) pool.map(insert, sys.argv[1:])