PK XHa3 3 edocuments/process.py# -*- coding: utf-8 -*-
import os
import re
from tempfile import NamedTemporaryFile
from subprocess import check_call
from shutil import copyfile
from PyQt5.QtCore import QObject, pyqtSignal
import edocuments
class Process(QObject):
progress = pyqtSignal(int, str, str, dict)
cancel = False
def process(
self, names, filename=None, destination_filename=None,
in_extention=None, get_content=False):
cmds = edocuments.config.get("cmds", {})
out_ext = in_extention
original_filename = filename
if destination_filename is None:
destination_filename = filename
for no, name in enumerate(names):
cmd = cmds.get(name)
if cmd is None:
raise "Missing command '%s' in `cmds`" % name
if isinstance(cmd, str):
cmd = dict(cmd=cmd)
if cmd.get('type') == 'rename':
destination_filename = self._rename(cmd, destination_filename)
else:
if 'out_ext' in cmd:
out_ext = cmd['out_ext']
inplace = cmd.get('inplace', False)
cmd_cmd = cmd.get('cmd')
if inplace:
out_name = filename
else:
if out_ext is None:
out_name = NamedTemporaryFile(mode='w+b').name
else:
out_name = NamedTemporaryFile(
mode='w+b',
suffix='.' + out_ext
).name
params = {}
if filename is not None:
params["in"] = "'%s'" % filename.replace("'", "'\"'\"'")
if not inplace:
params["out"] = "'%s'" % out_name.replace("'", "'\"'\"'")
try:
cmd_cmd = cmd_cmd.format(**params)
except:
print("Error in {name}: {cmd}, with {params}".format(
name=name, cmd=cmd_cmd, params=params))
raise
if self.cancel is True:
return None, None
print("{name}: {cmd}".format(name=name, cmd=cmd_cmd))
self.progress.emit(no, name, cmd_cmd, cmd)
check_call(cmd_cmd, shell=True)
filename = out_name
if get_content:
content = None
if os.path.exists(filename):
with open(filename) as f:
content = f.read()
if original_filename is None or original_filename != filename:
os.unlink(filename)
return content, out_ext
else:
if original_filename is not None and original_filename != filename:
os.unlink(original_filename)
if out_ext is not None:
destination_filename = "%s.%s" % (re.sub(
r"\.[a-z0-9A-Z]{2,5}$", "",
destination_filename
), out_ext)
if filename != destination_filename:
directory = os.path.dirname(destination_filename)
if not os.path.exists(directory):
os.makedirs(directory)
copyfile(filename, destination_filename)
os.unlink(filename)
return destination_filename, out_ext
def _rename(self, cmd, destination_filename):
from_re = cmd.get('from')
to_re = cmd.get('to')
if cmd.get('format') in ['upper', 'lower']:
def format_term(term):
if cmd.get('format') == 'upper':
return term.upper()
else:
return term.lower()
to_re = lambda m: format_term(m.group(0))
return re.sub(from_re, to_re, destination_filename)
def destination_filename(self, names, filename, extension=None):
cmds = edocuments.config.get("cmds", {})
for name in names:
cmd = cmds.get(name)
if cmd is None:
raise "Missing command '%s' in `cmds`" % name
if isinstance(cmd, str):
cmd = {}
if cmd.get('type') == 'rename':
filename = self._rename(cmd, filename)
else:
if 'out_ext' in cmd:
extension = cmd['out_ext']
if extension is not None:
filename = "%s.%s" % (re.sub(
r"\.[a-z0-9A-Z]{2,5}$", "",
filename
), extension)
return filename, extension
PK >HO edocuments/label_dialog.py# -*- coding: utf-8 -*-
from PyQt5.QtWidgets import QDialog
from PyQt5.QtGui import QPixmap
from edocuments.ui.label_dialog import Ui_Dialog
class Dialog(QDialog):
def __init__(self):
super().__init__()
self.ui = Ui_Dialog()
self.ui.setupUi(self)
def set_image(self, image_filename):
size = 800
pixmap = QPixmap(image_filename)
if pixmap.width() > pixmap.height():
if pixmap.width() > size:
pixmap = pixmap.scaledToWidth(size)
else:
if pixmap.height() > size:
pixmap = pixmap.scaledToHeight(size)
self.ui.label.setPixmap(pixmap)
self.ui.label.setMask(pixmap.mask())
self.ui.label.show()
PK MJH` edocuments/colorize.py# -*- coding: utf-8 -*-
BLACK = 0
RED = 1
GREEN = 2
YELLOW = 3
BLUE = 4
MAGENTA = 5
CYAN = 6
WHITE = 7
def colorize(text, color):
return "\x1b[01;3%im%s\x1b[0m" % (color, text)
PK ZHtF
edocuments/index.py# -*- coding: utf-8 -*-
import os
from whoosh.index import create_in, open_dir, exists_in
from whoosh.fields import Schema, ID, TEXT, STORED
from whoosh.qparser import QueryParser
from whoosh.query import Term
from whoosh.scoring import BM25F
from whoosh import writing
import edocuments
PATH = 'path_id'
CONTENT = 'content'
DATE = 'date'
DIRECTORY = 'directory'
MD5 = 'md5'
class Index:
def __init__(self):
self.directory = os.path.join(edocuments.root_folder, '.index')
self.dirty = False
schema = Schema(**{
PATH: ID(stored=True, unique=True),
CONTENT: TEXT(stored=True),
DATE: STORED,
DIRECTORY: STORED,
MD5: TEXT(stored=True),
})
self.parser_path = QueryParser("path_id", schema)
self.parser_content = QueryParser("content", schema)
if not exists_in(self.directory):
os.makedirs(self.directory)
self.index = create_in(self.directory, schema)
else:
self.index = open_dir(self.directory)
if 'path' in self.index.schema.names():
with self.index.writer() as writer:
writer.remove_field('path')
if 'directory' not in self.index.schema.names():
with self.index.writer() as writer:
writer.add_field('directory', STORED)
if 'md5' not in self.index.schema.names():
with self.index.writer() as writer:
writer.add_field('md5', TEXT(stored=True))
print(
'Field length:\npath: %i\ncontent: %i\nmd5: %i' % (
self.index.field_length("path_id"),
self.index.field_length("content"),
self.index.field_length("md5"),
)
)
def get(self, filename):
filename = edocuments.short_path(filename)
with self.index.searcher() as searcher:
results = searcher.search(Term("path_id", filename))
if len(results) == 0:
return None
assert(len(results) == 1)
result = {}
for field in self.index.schema.names():
result[filed] = results[0].get(filed)
return result
def add(self, filename, text, date, md5):
filename = edocuments.short_path(filename)
with self.index.writer() as writer:
writer.update_document(**{
PATH: filename,
CONTENT: text,
DATE: date,
DIRECTORY: False,
})
def optimize(self):
self.index.optimize()
def clear(self):
with self.index.writer() as writer:
writer.mergetype = writing.CLEAR
def search(self, text):
with self.index.searcher(weighting=BM25F(B=0, K1=1.2)) as searcher:
query = self.parser_content.parse(text)
results = searcher.search(
query, terms=True, limit=1000,
)
return [{
'path': r.get(PATH),
'content': r.get(CONTENT),
'directory': r.get(DIRECTORY),
'highlight': r.highlights(
PATH if PATH in r.matched_terms() else CONTENT
),
} for r in results]
_index = None
def index():
global _index
if _index is None:
_index = Index()
return _index
PK ZHkEi edocuments/__init__.py# -*- coding: utf-8 -*-
import os
import sys
import re
import shutil
import subprocess
from pathlib import Path
from yaml import load
from argparse import ArgumentParser
from bottle import mako_template
from PyQt5.QtCore import QSettings
from PyQt5.QtWidgets import QApplication
from edocuments.main_widget import MainWindow
CONFIG_FILENAME = "edocuments.yaml"
if 'APPDATA' in os.environ:
CONFIG_PATH = os.path.join(os.environ['APPDATA'], CONFIG_FILENAME)
elif 'XDG_CONFIG_HOME' in os.environ:
CONFIG_PATH = os.path.join(os.environ['XDG_CONFIG_HOME'], CONFIG_FILENAME)
else:
CONFIG_PATH = os.path.join(os.environ['HOME'], '.config', CONFIG_FILENAME)
config = {}
root_folder = None
settings = None
main_window = None
def short_path(filename):
global root_folder
filename = str(filename)
if filename[:len(root_folder)] == root_folder:
return filename[len(root_folder):]
return filename
def long_path(filename):
global root_folder
if len(filename) == 0 or filename[0] != '/':
return os.path.join(root_folder, filename)
return filename
def gui_main():
global config, root_folder, settings, main_window
with open(CONFIG_PATH) as f:
config = load(f.read())
root_folder = os.path.expanduser(config.get("root_folder"))
if root_folder[-1] != '/':
root_folder += '/'
settings = QSettings("org", "edocuments")
app = QApplication(sys.argv)
main_window = MainWindow()
if settings.value("geometry") is not None:
main_window.restoreGeometry(settings.value("geometry"))
if settings.value("state") is not None:
main_window.restoreState(settings.value("state"))
main_window.show()
app.exec()
settings.setValue("geometry", main_window.saveGeometry())
settings.setValue("state", main_window.saveState())
settings.sync()
def cmd_main():
parser = ArgumentParser(
description='eDocuments - a simple and productive personal documents '
'library.',
prog=sys.argv[0]
)
parser.add_argument(
'--install', action='store_true',
help='Install the application icon, the required packages, '
'and default config file',
)
parser.add_argument(
'--lang3', default='eng', metavar='LANG',
help='the language used by the OCR',
)
parser.add_argument(
'--list-available-lang3', action='store_true',
help='List the available language used by the OCR.',
)
options = parser.parse_args()
if options.list_available_lang3:
if Path('/usr/bin/apt-cache').exists():
result = subprocess.check_output([
'/usr/bin/apt-cache', 'search', 'tesseract-ocr-'])
result = str(result)[1:].strip("'")
result = result.replace('\\n', '\n')
result = re.sub(
'\ntesseract-ocr-all - [^\n]* packages\n',
'', result, flags=re.MULTILINE)
result = re.sub(r'tesseract-ocr-', '', result)
result = re.sub(r' - tesseract-ocr language files ', ' ', result)
print(result)
else:
exit('Works only on Debian base OS')
if options.install:
if input(
'Create desktop and icon files (edocuments.desktop and '
'edocuments.png in ~/.local/share/applications)?\n'
) in ['y', 'Y']:
if not Path(os.path.expanduser(
'~/.local/share/applications')).exists():
os.makedirs(os.path.expanduser('~/.local/share/applications'))
ressource_dir = os.path.join(os.path.dirname(
os.path.abspath(__file__)), 'ressources')
shutil.copyfile(
os.path.join(ressource_dir, 'edocuments.desktop'),
os.path.expanduser(
'~/.local/share/applications/edocuments.desktop')
)
shutil.copyfile(
os.path.join(ressource_dir, 'edocuments.png'),
os.path.expanduser(
'~/.local/share/applications/edocuments.png')
)
if input(
'Create the basic configuration '
'(~/.config/edocuments.yaml)?\n'
) in ['y', 'Y']:
config = mako_template(
os.path.join(ressource_dir, 'config.yaml'),
lang=options.lang3
)
with open(
os.path.expanduser('~/.config/edocuments.yaml'), 'w'
) as file_open:
file_open.write(config)
if Path('/usr/bin/apt-get').exists():
installed_packages = []
for line in str(subprocess.check_output(['dpkg', '-l'])) \
.split(r'\n'):
if line.find('ii ') == 0:
installed_packages.append(re.split(r' +', line)[1])
packages = [p for p in [
'python3-pyqt5', 'sane-utils', 'imagemagick',
'tesseract-ocr', 'tesseract-ocr-' + options.lang3,
'optipng', 'poppler-utils', 'odt2txt',
'docx2txt',
] if p not in installed_packages]
print(packages)
if len(packages) != 0:
if input(
'Install the requires packages (%s)?\n' %
', '.join(packages)
) in ['y', 'Y']:
subprocess.check_call([
'sudo', 'apt-get', 'install',
] + packages)
else:
print(
'WARNING: the package installation works only on Debian '
'base OS')
PK ZH*h h edocuments/backend.py# -*- coding: utf-8 -*-
import sys
import hashlib
import traceback
from pathlib import Path
from threading import Lock
from concurrent.futures import ThreadPoolExecutor, as_completed
from PyQt5.QtCore import QObject, pyqtSignal
import edocuments
from edocuments.process import Process
from edocuments.index import index, PATH, CONTENT, DATE, DIRECTORY, MD5
class Backend(QObject):
update_library_progress = pyqtSignal(int, str, str)
scan_end = pyqtSignal(str)
scan_error = pyqtSignal(str)
process = Process()
lock = Lock()
def do_scan(self, filename, cmds, postprocess):
try:
filename, extension = self.process.process(
cmds, destination_filename=filename,
)
except:
traceback.print_exc()
self.scan_error.emit(str(sys.exc_info()[1]))
raise
if filename is None:
return
self.scan_end.emit(filename)
try:
filename, extension = self.process.process(
postprocess, filename=filename,
in_extention=extension,
)
conv = [
c for c in edocuments.config.get('to_txt')
if c['extension'] == extension
]
if len(conv) >= 1:
conv = conv[0]
cmds = conv.get("cmds")
try:
text, extension = self.process.process(
cmds, filename=filename, get_content=True,
)
index().add(filename, text)
except:
traceback.print_exc()
self.scan_error.emit(str(sys.exc_info()[1]))
raise
except:
traceback.print_exc()
self.scan_error.emit(str(sys.exc_info()[1]))
raise
def do_update_library(self):
docs_to_rm = []
docs_date = {}
with index().index.reader() as reader:
for num, doc in reader.iter_docs():
if \
doc[PATH] in docs_date or \
not Path(edocuments.long_path(doc[PATH])).exists() or \
doc[PATH] != edocuments.short_path(doc[PATH]):
print("Delete document: " + doc[PATH])
docs_to_rm.append(num)
else:
docs_date[doc[PATH]] = (doc.get(DATE), doc.get(MD5))
self.update_library_progress.emit(
0, 'Adding the directories...', '')
index_folder = '.index'
for directory in Path(edocuments.root_folder).rglob('*'):
dir_ = edocuments.short_path(directory)
if \
dir_ not in docs_date and \
directory.is_dir() and \
directory != index_folder:
ignore = False
for ignore_pattern in edocuments.config.get('ignore', []):
if directory.match(ignore_pattern):
ignore = False
break
if not ignore:
with index().index.writer() as writer:
writer.update_document(**{
PATH: dir_,
CONTENT: dir_,
DATE: directory.stat().st_mtime,
DIRECTORY: True,
})
self.update_library_progress.emit(
0, 'Browsing the files (0)...', '')
index_folder += '/'
todo = []
for conv in edocuments.config.get('to_txt'):
cmds = conv.get("cmds")
for filename in Path(edocuments.root_folder).rglob(
"*." + conv.get('extension')):
ignore = False
for ignore_pattern in edocuments.config.get('ignore', []):
if directory.match(ignore_pattern):
ignore = False
break
if not ignore and filename.exists() and str(filename).find(index_folder) != 0:
current_date, md5 = docs_date.get(edocuments.short_path(filename), (None, None))
new_date = filename.stat().st_mtime
new_md5 = hashlib.md5()
with open(str(filename), "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
new_md5.update(chunk)
if current_date is None or new_date > current_date:
if current_date is not None and (md5 is None or md5 == new_md5.hexdigest()):
doc = index().get(filename)
index().add(
filename,
doc[CONTENT],
max(new_date, current_date),
new_md5.hexdigest()
)
else:
print("Add document: " + edocuments.short_path(filename))
todo.append((str(filename), cmds, new_date, new_md5.hexdigest()))
self.update_library_progress.emit(
0, 'Browsing the files (%i)...' % len(todo), edocuments.short_path(filename))
self.nb = len(todo)
self.nb_error = 0
self.no = 0
print('Removes %i old documents.' % len(docs_to_rm))
with index().index.writer() as writer:
for num in docs_to_rm:
writer.delete_document(num)
self.update_library_progress.emit(
0, 'Parsing the files %i/%i.' % (self.no, self.nb), '',
)
print('Process %i documents.' % len(todo))
with ThreadPoolExecutor(
max_workers=edocuments.config.get('nb_process', 8)
) as executor:
future_results = {
executor.submit(self.to_txt, t):
t for t in todo
}
for feature in as_completed(future_results):
pass
self.update_library_progress.emit(
0, 'Optimise the index...', '',
)
index().optimize()
if self.nb_error != 0:
self.scan_error.emit("Finished with %i errors" % self.nb_error)
else:
self.update_library_progress.emit(
100, 'Finish', '',
)
def to_txt(self, job):
filename, cmds, date, md5 = job
try:
text, extension = Process().process(
cmds, filename=str(filename), get_content=True,
)
if text is None:
text = ''
self.lock.acquire()
self.no += 1
self.update_library_progress.emit(
self.no * 100 / self.nb, 'Parsing the files %i/%i.' % (self.no, self.nb),
edocuments.short_path(filename),
)
print("%i/%i" % (self.no, self.nb))
if text is False:
print("Error with document: " + filename)
self.nb_error += 1
else:
index().add(
filename,
"%s\n%s" % (filename, text),
date, md5
)
self.lock.release()
except:
traceback.print_exc()
return filename, False
def optimize_library(self):
index().optimize()
PK ZHO* edocuments/main_widget.py# -*- coding: utf-8 -*-
import os
import re
import pathlib
from threading import Thread
from subprocess import call
from PyQt5.Qt import Qt
from PyQt5.QtWidgets import QMainWindow, QFileDialog, \
QErrorMessage, QMessageBox, QProgressDialog, QListWidgetItem
import edocuments
from edocuments.backend import Backend
from edocuments.index import index
from edocuments.ui.main import Ui_MainWindow
from edocuments.label_dialog import Dialog
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.ui = Ui_MainWindow()
self.ui.setupUi(self)
self.backend = Backend()
self.ui.scan_comments.setText(edocuments.config.get("scan_comments"))
default_index = 0
for s in edocuments.config.get("scans", []):
if s.get("default") is True:
default_index = self.ui.scan_type.count()
self.ui.scan_type.addItem(s.get("name"), s)
self.ui.scan_type.setCurrentIndex(default_index)
self.ui.scan_browse.clicked.connect(self.scan_browse)
self.ui.scan_to.returnPressed.connect(self.scan_start)
self.ui.scan_start.clicked.connect(self.scan_start)
self.ui.open.clicked.connect(self.open_selected)
self.ui.open_folder.clicked.connect(self.open_folder)
self.image_dialog = Dialog()
self.backend.scan_end.connect(self.end_scan)
self.backend.scan_error.connect(self.on_scan_error)
self.backend.update_library_progress.connect(
self.on_update_update_library_progress)
self.ui.search_text.textChanged.connect(self.search)
self.ui.search_result_list.itemSelectionChanged.connect(
self.selection_change)
self.ui.library_update.triggered.connect(self.update_library)
self.ui.library_optimize.triggered.connect(
self.backend.optimize_library)
self.ui.library_reset.triggered.connect(self.reset_library)
self.backend.process.progress.connect(self.on_progress)
def open_selected(self):
item = self.ui.search_result_list.currentItem()
if item is not None:
cmd = edocuments.config.get('open_cmd').split(' ')
cmd.append(edocuments.long_path(item.result.get('path')))
call(cmd)
def open_folder(self):
item = self.ui.search_result_list.currentItem()
if item is not None:
cmd = edocuments.config.get('open_cmd').split(' ')
cmd.append(os.path.dirname(
edocuments.long_path(item.result.get('path'))))
call(cmd)
def selection_change(self):
item = self.ui.search_result_list.currentItem()
if item is not None:
self.ui.search_result_text.document().setHtml(
item.result.get('highlight'))
else:
self.ui.search_result_text.document().setHtml('')
def reset_library(self):
msg = QMessageBox(self)
msg.setWindowTitle("Reset the library...")
msg.setInformativeText("Are you sure to reset all you index?")
msg.setStandardButtons(QMessageBox.Ok | QMessageBox.Cancel)
if msg.exec() == QMessageBox.Ok:
index().clear()
def search(self, text):
model = self.ui.search_result_list.model()
model.removeRows(0, model.rowCount())
raw_results = index().search(self.ui.search_text.text())
dirs = dict([
(r.get('path'), -1)
for r in raw_results if r.get('directory')
])
results = {}
for index_, result in enumerate(raw_results):
path_ = result.get('path')
dir_ = os.path.dirname(path_)
if dir_ not in dirs:
results[path_] = [result, float(index_) / len(raw_results)]
else:
dirs[dir_] += 1
for dir_, count in dirs.items():
if dir_ in results:
results[dir_][1] += count
results = sorted(results.values(), key=lambda x: -x[1])
for result, count in results:
postfix = ' (%i)' % (count + 1) if result.get('directory') else ''
item = QListWidgetItem(
result['path'] + postfix,
self.ui.search_result_list
)
item.result = result
def scan_browse(self, event):
filename = QFileDialog.getSaveFileName(
self, "Scan to", directory=self.filename()
)[0]
filename = re.sub(r"\.[a-z0-9A-Z]{2,5}$", "", filename)
filename = edocuments.short_path(filename)
self.ui.scan_to.setText(filename)
def update_library(self):
self.update_library_progress = QProgressDialog(
"Scanning...", None, 0, 100, self)
self.update_library_progress.setWindowTitle('Updating the library...')
self.update_library_progress.setLabelText('Browsing the files...')
self.update_library_progress.setWindowModality(Qt.WindowModal)
self.update_library_progress.show()
t = Thread(target=self.backend.do_update_library)
t.start()
def on_update_update_library_progress(self, pos, text, status):
self.update_library_progress.setValue(pos)
self.update_library_progress.setLabelText(text)
self.statusBar().showMessage(status)
def filename(self):
return edocuments.long_path(self.ui.scan_to.text())
def scan_start(self, event=None):
if pathlib.Path(self.filename()).is_dir():
err = QErrorMessage(self)
err.setWindowTitle("eDocuments - Error")
err.showMessage("The destination is a directory!")
return
destination, extension = self.backend.process.destination_filename(
self.ui.scan_type.currentData().get("cmds"),
self.filename()
)
if pathlib.Path(destination).is_file():
msg = QMessageBox(self)
msg.setWindowTitle("Scanning...")
msg.setText("The destination file already exists")
msg.setInformativeText("Do you want to overwrite it?")
msg.setStandardButtons(
QMessageBox.Ok | QMessageBox.Cancel | QMessageBox.Open)
ret = msg.exec()
if ret == QMessageBox.Ok:
self._scan()
elif ret == QMessageBox.Open:
cmd = edocuments.config.get('open_cmd').split(' ')
cmd.append(destination)
call(cmd)
else:
self._scan()
def _scan(self):
cmds = self.ui.scan_type.currentData().get("cmds")
self.progress = QProgressDialog(
"Scanning...", "Cancel", 0, len(cmds), self)
self.progress.setWindowTitle("Scanning...")
self.progress.setWindowModality(Qt.WindowModal)
self.progress.setLabelText('Scanning...')
self.progress.show()
t = Thread(
target=self.backend.do_scan,
args=[
self.filename(),
self.ui.scan_type.currentData().get("cmds"),
self.ui.scan_type.currentData().get("postprocess", []),
],
)
t.start()
def on_progress(self, no, name, cmd_cmd, cmd):
if self.progress is not None:
self.progress.setValue(no)
self.progress.setLabelText(cmd.get('display', ''))
if self.progress.wasCanceled() is True:
print("Cancel")
self.backend.process.cancel = True
self.statusBar().showMessage(cmd_cmd)
def end_scan(self, filename):
self.progress.hide()
self.image_dialog.set_image(filename)
self.image_dialog.exec()
self.ui.scan_to.setText(re.sub(
' ([0-9]{1,3})$',
lambda m: ' ' + str(int(m.group(1)) + 1),
self.ui.scan_to.text()
))
self.statusBar().showMessage('')
def on_scan_error(self, error):
print('Error: %s' % error)
err = QErrorMessage(self)
err.setWindowTitle("eDocuments - scan error")
err.showMessage(error)
PK >H". . edocuments/ui/label_dialog.py# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'edocuments/ui/label_dialog.ui'
#
# Created by: PyQt5 UI code generator 5.4.2
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_Dialog(object):
def setupUi(self, Dialog):
Dialog.setObjectName("Dialog")
Dialog.resize(194, 70)
self.verticalLayout = QtWidgets.QVBoxLayout(Dialog)
self.verticalLayout.setObjectName("verticalLayout")
self.label = QtWidgets.QLabel(Dialog)
self.label.setText("")
self.label.setObjectName("label")
self.verticalLayout.addWidget(self.label)
self.buttonBox = QtWidgets.QDialogButtonBox(Dialog)
self.buttonBox.setOrientation(QtCore.Qt.Horizontal)
self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.Ok)
self.buttonBox.setObjectName("buttonBox")
self.verticalLayout.addWidget(self.buttonBox)
self.retranslateUi(Dialog)
self.buttonBox.accepted.connect(Dialog.accept)
self.buttonBox.rejected.connect(Dialog.reject)
QtCore.QMetaObject.connectSlotsByName(Dialog)
def retranslateUi(self, Dialog):
_translate = QtCore.QCoreApplication.translate
Dialog.setWindowTitle(_translate("Dialog", "Dialog"))
PK XH$a edocuments/ui/main.py# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'edocuments/ui/main.ui'
#
# Created by: PyQt5 UI code generator 5.4.2
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(583, 525)
icon = QtGui.QIcon()
icon.addPixmap(QtGui.QPixmap("../../ressources/edocuments.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
MainWindow.setWindowIcon(icon)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget)
self.verticalLayout.setObjectName("verticalLayout")
self.tabWidget = QtWidgets.QTabWidget(self.centralwidget)
self.tabWidget.setEnabled(True)
self.tabWidget.setObjectName("tabWidget")
self.search = QtWidgets.QWidget()
self.search.setObjectName("search")
self.verticalLayout_4 = QtWidgets.QVBoxLayout(self.search)
self.verticalLayout_4.setObjectName("verticalLayout_4")
self.search_text = QtWidgets.QLineEdit(self.search)
self.search_text.setObjectName("search_text")
self.verticalLayout_4.addWidget(self.search_text)
self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
self.horizontalLayout_2.setSizeConstraint(QtWidgets.QLayout.SetDefaultConstraint)
self.horizontalLayout_2.setObjectName("horizontalLayout_2")
self.search_result_list = QtWidgets.QListWidget(self.search)
self.search_result_list.setObjectName("search_result_list")
self.horizontalLayout_2.addWidget(self.search_result_list)
self.verticalLayout_5 = QtWidgets.QVBoxLayout()
self.verticalLayout_5.setObjectName("verticalLayout_5")
self.search_result_text = QtWidgets.QTextBrowser(self.search)
self.search_result_text.setObjectName("search_result_text")
self.verticalLayout_5.addWidget(self.search_result_text)
self.horizontalWidget = QtWidgets.QWidget(self.search)
self.horizontalWidget.setObjectName("horizontalWidget")
self.horizontalLayout = QtWidgets.QHBoxLayout(self.horizontalWidget)
self.horizontalLayout.setObjectName("horizontalLayout")
spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
self.horizontalLayout.addItem(spacerItem)
self.open_folder = QtWidgets.QPushButton(self.horizontalWidget)
self.open_folder.setObjectName("open_folder")
self.horizontalLayout.addWidget(self.open_folder)
self.open = QtWidgets.QPushButton(self.horizontalWidget)
self.open.setObjectName("open")
self.horizontalLayout.addWidget(self.open)
self.verticalLayout_5.addWidget(self.horizontalWidget)
self.horizontalLayout_2.addLayout(self.verticalLayout_5)
self.horizontalLayout_2.setStretch(0, 1)
self.verticalLayout_4.addLayout(self.horizontalLayout_2)
self.tabWidget.addTab(self.search, "")
self.scan = QtWidgets.QWidget()
self.scan.setObjectName("scan")
self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.scan)
self.verticalLayout_2.setObjectName("verticalLayout_2")
self.horizontalLayout_3 = QtWidgets.QHBoxLayout()
self.horizontalLayout_3.setObjectName("horizontalLayout_3")
self.label = QtWidgets.QLabel(self.scan)
self.label.setObjectName("label")
self.horizontalLayout_3.addWidget(self.label)
self.scan_type = QtWidgets.QComboBox(self.scan)
self.scan_type.setObjectName("scan_type")
self.horizontalLayout_3.addWidget(self.scan_type)
self.verticalLayout_2.addLayout(self.horizontalLayout_3)
self.direct = QtWidgets.QGroupBox(self.scan)
self.direct.setObjectName("direct")
self.horizontalLayout_5 = QtWidgets.QHBoxLayout(self.direct)
self.horizontalLayout_5.setObjectName("horizontalLayout_5")
self.scan_to = QtWidgets.QLineEdit(self.direct)
self.scan_to.setObjectName("scan_to")
self.horizontalLayout_5.addWidget(self.scan_to)
self.scan_browse = QtWidgets.QPushButton(self.direct)
self.scan_browse.setObjectName("scan_browse")
self.horizontalLayout_5.addWidget(self.scan_browse)
self.scan_start = QtWidgets.QPushButton(self.direct)
self.scan_start.setObjectName("scan_start")
self.horizontalLayout_5.addWidget(self.scan_start)
self.verticalLayout_2.addWidget(self.direct)
self.groupBox = QtWidgets.QGroupBox(self.scan)
self.groupBox.setObjectName("groupBox")
self.verticalLayout_6 = QtWidgets.QVBoxLayout(self.groupBox)
self.verticalLayout_6.setObjectName("verticalLayout_6")
self.scan_comments = QtWidgets.QLabel(self.groupBox)
self.scan_comments.setText("")
self.scan_comments.setObjectName("scan_comments")
self.verticalLayout_6.addWidget(self.scan_comments)
self.verticalLayout_2.addWidget(self.groupBox)
spacerItem1 = QtWidgets.QSpacerItem(20, 40, QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Expanding)
self.verticalLayout_2.addItem(spacerItem1)
self.tabWidget.addTab(self.scan, "")
self.verticalLayout.addWidget(self.tabWidget)
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 583, 25))
self.menubar.setObjectName("menubar")
self.menuUpdate_library = QtWidgets.QMenu(self.menubar)
self.menuUpdate_library.setObjectName("menuUpdate_library")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.library_update = QtWidgets.QAction(MainWindow)
self.library_update.setObjectName("library_update")
self.library_reset = QtWidgets.QAction(MainWindow)
self.library_reset.setObjectName("library_reset")
self.library_optimize = QtWidgets.QAction(MainWindow)
self.library_optimize.setObjectName("library_optimize")
self.menuUpdate_library.addAction(self.library_update)
self.menuUpdate_library.addAction(self.library_optimize)
self.menuUpdate_library.addAction(self.library_reset)
self.menubar.addAction(self.menuUpdate_library.menuAction())
self.retranslateUi(MainWindow)
self.tabWidget.setCurrentIndex(0)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "eDocuments - a Simple and Productive Personal Documents Library"))
self.open_folder.setText(_translate("MainWindow", "Open folder"))
self.open.setText(_translate("MainWindow", "Open"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.search), _translate("MainWindow", "Search"))
self.label.setText(_translate("MainWindow", "Scan type"))
self.direct.setTitle(_translate("MainWindow", "Scan"))
self.scan_browse.setText(_translate("MainWindow", "Browse..."))
self.scan_start.setText(_translate("MainWindow", "Scan"))
self.groupBox.setTitle(_translate("MainWindow", "Comments"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.scan), _translate("MainWindow", "Scan"))
self.menuUpdate_library.setTitle(_translate("MainWindow", "&Library"))
self.library_update.setText(_translate("MainWindow", "&Update"))
self.library_reset.setText(_translate("MainWindow", "Reset"))
self.library_optimize.setText(_translate("MainWindow", "Optimise"))
PK =H edocuments/ui/__init__.pyPK >HJ
A A $ edocuments/ressources/edocuments.svg
PK J[HO ! edocuments/ressources/config.yaml#lang:
root_folder: "~/Documents"
nb_process: 8
ignore:
- '*_fichiers/*'
- '*_files/*'
scan_comments: |
Recomanded file names:
* - -
scans:
- name: Color
cmds:
- scanc
- crop
- 2png
- auto-rotate
postprocess:
- optipng
- name: Black & White
default: true
cmds:
- scan
- crop
- cleanup
- 2png
- auto-rotate
postprocess:
- optipng
to_txt:
- extension: png
cmds:
- ocr
- extension: jpeg
cmds:
- ocr
- extension: pdf
cmds:
- pdf2txt
- extension: txt
cmds:
- cp
- extension: csv
cmds:
- cp
- extension: odt
cmds:
- odt2txt
- extension: ods
cmds:
- ods2txt
- extension: ods
cmds:
- ods2txt
- extension: docx
cmds:
- docx2txt
open_cmd: gnome-open
task:
- name: Optimise images
on_ext: png
cmds:
- optipng
- name: Fix files names
cmds:
- fixextensions_case
- fixextensions_jpeg
cmds:
scan:
display: Scanning...
cmd: "scanimage --format tiff --resolution 300 --mode Gray --gamma 1 -l 0 -t 0 -x 216.069 -y 297.011 > {out}"
out_ext: tiff
scanc:
display: Scanning...
cmd: "scanimage --format tiff --resolution 300 --gamma 1 -l 0 -t 0 -x 216.069 -y 297.011 > {out}"
out_ext: tiff
crop:
display: Cropping.
cmd: "convert {in} -crop `convert {in} -crop 2502x3458+25+25 +repage -level 20%,80%,4 -virtual-pixel edge -blur 0x5 -fuzz 4% -trim -format '%[fx:w+50]x%[fx:h+50]+%[fx:page.x]+%[fx:page.y]' info:` +repage -normalize {out}"
cleanup:
display: Cleanup the piture.
cmd: "convert {in} -background white +matte -fuzz 10% -fill white -level 10%,80%,1 +matte -format tiff {out}"
2png:
display: Convert to PNG.
cmd: "convert {in} -format png {out}"
out_ext: png
auto-rotate:
display: Automatic rotate.
cmd: "convert {in} -rotate `(tesseract -psm 0 -l ${lang} {in} text 2>&1 || echo 'Orientation in degrees 0') | grep 'Orientation in degrees' | awk '{{print $4}}'` {out}"
ocr:
display: Optical character recognition.
cmd: "tesseract -l ${lang} {in} stdout > {out}"
out_ext: txt
pdf2txt:
display: Convert PDF to text.
cmd: "pdftotext {in} {out}"
out_ext: txt
odt2txt:
display: Convert OpenDocument to text.
cmd: "odt2txt {in} --output={out}"
out_ext: txt
ods2txt:
display: Convert OpenDocument to text.
cmd: "ods2txt {in} --output={out}"
out_ext: txt
docx2txt:
display: Convert Docx to text.
cmd: "docx2txt {in} {out}"
out_ext: txt
cp:
display: Copy.
cmd: "cp {in} {out}"
optipng:
display: Compress the picture.
cmd: "optipng -o7 {in}"
inplace: true
fixextensions_case:
display: Fix extension type case.
type: remame
from: '\.([a-zA-Z]+)$'
format: lower
fixextensions_jpeg:
display: Fix the extensions (.jpg -> .jpeg).
type: remame
from: '\.jpg$'
to: '.jpeg'
PK Ճ>HT\, , $ edocuments/ressources/edocuments.pngPNG
IHDR \rf sBIT|d pHYs
D tEXtSoftware www.inkscape.org< IDATx$u&=82άh @BHq@QɑIiF3&45蜽5flG+EiFFP! Dj q]wUVUfFȨ|@vWx}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}Sԧ>O}z_3N3Ӎnn~G=I2g0Bb? A
u!į63>)N60Y!QUzX3nnSv+7+
I`H
sxDw35>ZzNHLH`(qݬ +;188Ohh$E}8P]"K˘e@*çٶ=MH}
O;Je1x+@50:at0HMbX X1 ?S1W
_̶