PK ! dikicli/__init__.pyPK ! ڠ'NP P dikicli/cli.pyimport argparse
import logging
import logging.config
import sys
from .core import CACHE_DIR
from .core import DEBUG
from .core import Config
from .core import WordNotFound
from .core import __version__
from .core import display_index
from .core import translate
from .core import wrap_text
LOG_FILE = CACHE_DIR.joinpath("diki.log")
if not CACHE_DIR.exists():
CACHE_DIR.mkdir(parents=True)
logging.config.dictConfig(
{
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"verbose": {
"format": "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
},
"simple": {"format": "%(message)s"},
},
"handlers": {
"console": {
"level": logging.WARNING,
"class": "logging.StreamHandler",
"formatter": "simple",
},
"file": {
"class": "logging.handlers.RotatingFileHandler",
"filename": LOG_FILE,
"maxBytes": 1048576,
"backupCount": 5,
"formatter": "verbose",
},
},
"loggers": {
"dikicli": {
"handlers": ["file", "console"],
"level": logging.DEBUG if DEBUG else logging.INFO,
}
},
}
)
logger = logging.getLogger(__name__)
def get_parser():
parser = argparse.ArgumentParser(
prog="diki", description="Commandline interface for diki.pl"
)
parser.add_argument(
"--version",
action="version",
version="dikicli {version}".format(version=__version__),
)
parser.add_argument(
"--create-config", action="store_true", help="create default configuration file"
)
parser.add_argument("-r", "--refresh", action="store_true", help="ignore cache")
translation = parser.add_argument_group("translation")
translation.add_argument("word", nargs="?", help="word to translate")
translation.add_argument(
"-p", "--pol-eng", action="store_true", help="translate polish word to english"
)
translation.add_argument(
"-w",
"--linewrap",
metavar="WIDTH",
help=("wrap lines longer than WIDTH; set to 0 to disable wrapping"),
)
html = parser.add_argument_group("html")
html.add_argument(
"-i",
"--display-index",
action="store_true",
help="open index file in web browser",
)
return parser
def main():
parser = get_parser()
args = parser.parse_args()
# if ran with no arguments print usage and exit
if len(sys.argv) == 1:
# TODO: make usage more informative
parser.print_usage()
sys.exit(1)
config = Config()
config.read_config()
if args.linewrap:
config["linewrap"] = args.linewrap
linewrap = int(config["linewrap"])
# create configuration file
if args.create_config:
config_file = config.create_default_config()
print("New config file created: {}".format(config_file))
sys.exit(0)
# handle word translation
if args.word:
use_cache = not args.refresh
to_eng = args.pol_eng
try:
translation = translate(args.word, config, use_cache, to_eng)
wrapped_text = wrap_text(translation, linewrap)
print(wrapped_text)
sys.exit(0)
except WordNotFound:
sys.exit(1)
# open index file in browser
if args.display_index:
try:
display_index(config)
sys.exit(0)
except FileNotFoundError:
sys.exit(1)
PK ! ]}8J J dikicli/core.pyimport configparser
import html
import logging
import os
import re
import shutil
import sys
import textwrap
import urllib.parse
import urllib.request
import webbrowser
from collections import namedtuple
from itertools import zip_longest
from pathlib import Path
from bs4 import BeautifulSoup
from .templates import CONFIG_TEMPLATE
from .templates import HTML_TEMPLATE
__version__ = "0.3"
Meaning = namedtuple("Meaning", ["meaning", "examples"])
PartOfSpeech = namedtuple("PartOfSpeech", ["part", "meanings"])
Translation = namedtuple("Translation", ["word", "parts_of_speech"])
XDG_DATA_HOME = os.environ.get("XDG_DATA_HOME", "~/.local/share")
XDG_CACHE_HOME = os.environ.get("XDG_CACHE_HOME", "~/.cache")
XDG_CONFIG_HOME = os.environ.get("XDG_CONFIG_HOME", "~/.config")
DATA_DIR = Path(
os.environ.get("DIKI_DATA_DIR", os.path.join(XDG_DATA_HOME, "dikicli"))
).expanduser()
CACHE_DIR = Path(
os.environ.get("DIKI_CACHE_DIR", os.path.join(XDG_CACHE_HOME, "dikicli"))
).expanduser()
CONFIG_FILE = Path(
os.environ.get(
"DIKI_CONFIG_FILE", os.path.join(XDG_CONFIG_HOME, "dikicli", "diki.conf")
)
).expanduser()
DEBUG = os.environ.get("DIKI_DEBUG")
logger = logging.getLogger(__name__)
class WordNotFound(Exception):
pass
class Config:
def __init__(self):
self.config_file = CONFIG_FILE
self.default_config = {
"data dir": DATA_DIR.as_posix(),
"linewrap": "78",
"colors": "yes",
"web browser": "default",
}
self.config = configparser.ConfigParser(
defaults=self.default_config, default_section="dikicli"
)
def __getitem__(self, key):
return self.config["dikicli"][key]
def __setitem__(self, key, value):
self.config["dikicli"][key] = value
def read_config(self):
"""
Read config from a file.
Invalid config values will be discarded and defaults used
in their place.
"""
_config = self.config["dikicli"]
# TODO: what if file doesn't exist?
if self.config_file.is_file():
logger.debug("Reading config file: %s", self.config_file.as_posix())
with open(self.config_file, mode="r") as f:
self.config.read_file(f)
# DIKI_DATA_DIR should always take precedence if it's set
if "DIKI_DATA_DIR" in os.environ:
_config["data dir"] = DATA_DIR.as_posix()
w = _config.get("linewrap")
try:
w = int(w)
if w < 0:
raise ValueError()
except ValueError:
logger.warning("Config: Invalid linewrap value. Using default.")
_config["linewrap"] = self.default_config["linewrap"]
c = _config.get("colors")
if c.lower() not in ["yes", "no", "true", "false"]:
logger.warning("Config: Invalid colors value. Using default.")
_config["colors"] = self.default_config["colors"]
def create_default_config(self):
"""Write default config file to disk.
Backs up existing configuration file.
Returns
-------
filename : string
Path to config file.
"""
filename = self.config_file.as_posix()
logger.info("Creating default config file: %s", filename)
config_dir = self.config_file.parent
if not config_dir.exists():
config_dir.mkdir(parents=True)
if self.config_file.is_file():
backup = filename + ".old"
logger.info("Saving config file backup at: %s", backup)
shutil.copy(filename, backup)
with open(self.config_file, mode="w") as f:
config_string = CONFIG_TEMPLATE.format(
data_dir=self.default_config["data dir"],
linewrap=self.default_config["linewrap"],
colors=self.default_config["colors"],
browser=self.default_config["web browser"],
)
f.write(config_string)
return filename
def _parse_html(html_dump, native=False):
"""Parse html string.
Parameters
----------
html_dump : str
HTML content.
native : bool, optional
Whether to translate from native to foreign language.
Returns
-------
translations : list
Translations list.
Raises
------
WordNotFound
If word can't be found.
"""
# pylint: disable=too-many-locals
soup = BeautifulSoup(html_dump, "html.parser")
translations = []
for entity in soup.select(
"div.diki-results-left-column > div > div.dictionaryEntity"
):
if not native:
meanings = entity.select("ol.foreignToNativeMeanings")
else:
meanings = entity.select("ol.nativeToForeignEntrySlices")
if not meanings:
# this can happen when word exists in both polish and english, e.g. 'pet'
continue
word = tuple(e.get_text().strip() for e in entity.select("div.hws h1 span.hw"))
parts = [p.get_text().strip() for p in entity.select("span.partOfSpeech")]
parts_list = []
for part, m in zip_longest(parts, meanings):
meanings = []
for elem in m.find_all("li", recursive=False):
examples = []
if not native:
meaning = [m.get_text().strip() for m in elem.select("span.hw")]
pattern = re.compile(r"\s{3,}")
for e in elem.find_all("div", class_="exampleSentence"):
example = re.split(pattern, e.get_text().strip())
examples.append(example)
else:
meaning = [elem.find("span", recursive=False).get_text().strip()]
# When translating to polish 'examples' are just synonyms of translation
synonyms = ", ".join(
sorted(
set(
x.get_text().strip()
for x in elem.select("ul > li > span.hw")
)
)
)
if synonyms:
examples.append([synonyms, None])
meanings.append(Meaning(meaning, examples))
parts_list.append(PartOfSpeech(part, meanings))
translations.append(Translation(word, parts_list))
if translations:
return translations
# if translation wasn't found check if there are any suggestions
suggestions = soup.find("div", class_="dictionarySuggestions")
if suggestions:
raise WordNotFound(suggestions.get_text().strip())
raise WordNotFound("Nie znaleziono tłumaczenia wpisanej frazy")
def _parse_cached(html_dump):
"""Parse html string from cached html files.
Parameters
----------
html_dump : string
HTML content
Returns
-------
translations : list
Translations list.
"""
soup = BeautifulSoup(html_dump, "html.parser")
translations = []
for trans in soup.find_all("div", class_="translation"):
word = tuple(t.get_text() for t in trans.select("div.word > h2"))
trans_list = []
for part in trans.find_all("div", class_="part-of-speech"):
pn = part.find("p", class_="part-name")
if pn:
pn = pn.get_text().strip("[]")
meanings = []
for meaning in part.find_all("div", class_="meaning"):
m = [mn.get_text() for mn in meaning.select("li > span")]
examples = []
for e in meaning.find_all("p"):
examples.append([ex.get_text() for ex in e.find_all("span")])
meanings.append(Meaning(m, examples))
trans_list.append(PartOfSpeech(pn, meanings))
translations.append(Translation(word, trans_list))
return translations
def _cache_lookup(word, data_dir, native=False):
"""Checks if word is in cache.
Parameters
----------
word : str
Word to check in cache.
data_dir : pathlib.Path
Cache directory location.
Returns
-------
translation : str or None
Translation of given word.
"""
trans_dir = "translations"
if native:
trans_dir += "_native"
logger.debug("Cache lookup: %s", word)
filename = data_dir.joinpath(trans_dir, "{}.html".format(word))
if filename.is_file():
with open(filename, mode="r") as f:
logger.debug("Cache found: %s", word)
# TODO: not sure if we should parse data here
translation = _parse_cached(f.read())
return translation
logger.debug("Cache miss: %s", word)
return None
def _get_words(data_dir):
"""Get list of words from history file.
Parameters
----------
data_dir : pathlib.Path
Directory where data is saved.
Returns
-------
word_list : list of str
List of words.
"""
words_file = data_dir.joinpath("words.txt")
word_list = []
if not words_file.is_file():
return word_list
with open(words_file, mode="r") as f:
for l in f:
line = l.rstrip()
word_list.append(line)
return word_list
def _save_to_history(word, data_dir):
"""Write word to history file.
Parameters
----------
word : str
Word to save to history.
data_dir : pathlib.Path
Directory where history file should be saved.
data_dir and it's parent directories will be created if needed.
"""
if not data_dir.exists():
logger.debug("Creating DATA DIR: %s", data_dir.as_posix())
data_dir.mkdir(parents=True)
if word not in _get_words(data_dir):
with open(data_dir.joinpath("words.txt"), mode="a+") as f:
logger.debug("Adding to history: %s", word)
f.write(word + "\n")
def _create_html_file_content(translations):
"""Create html string out of translation dict.
Parameters
----------
tralnslations : dict
Dictionary of word translations.
Returns
-------
str:
html string of translation
"""
content = []
for i1, t in enumerate(translations):
if i1 > 0:
content.append("
")
content.append('
[{part}]
'.format(part=t2.part)) content.append("{ex}
{tr}