PK!dikicli/__init__.pyPK!ڠ'NPPdikicli/cli.pyimport argparse import logging import logging.config import sys from .core import CACHE_DIR from .core import DEBUG from .core import Config from .core import WordNotFound from .core import __version__ from .core import display_index from .core import translate from .core import wrap_text LOG_FILE = CACHE_DIR.joinpath("diki.log") if not CACHE_DIR.exists(): CACHE_DIR.mkdir(parents=True) logging.config.dictConfig( { "version": 1, "disable_existing_loggers": False, "formatters": { "verbose": { "format": "%(asctime)s - %(levelname)s - %(name)s - %(message)s" }, "simple": {"format": "%(message)s"}, }, "handlers": { "console": { "level": logging.WARNING, "class": "logging.StreamHandler", "formatter": "simple", }, "file": { "class": "logging.handlers.RotatingFileHandler", "filename": LOG_FILE, "maxBytes": 1048576, "backupCount": 5, "formatter": "verbose", }, }, "loggers": { "dikicli": { "handlers": ["file", "console"], "level": logging.DEBUG if DEBUG else logging.INFO, } }, } ) logger = logging.getLogger(__name__) def get_parser(): parser = argparse.ArgumentParser( prog="diki", description="Commandline interface for diki.pl" ) parser.add_argument( "--version", action="version", version="dikicli {version}".format(version=__version__), ) parser.add_argument( "--create-config", action="store_true", help="create default configuration file" ) parser.add_argument("-r", "--refresh", action="store_true", help="ignore cache") translation = parser.add_argument_group("translation") translation.add_argument("word", nargs="?", help="word to translate") translation.add_argument( "-p", "--pol-eng", action="store_true", help="translate polish word to english" ) translation.add_argument( "-w", "--linewrap", metavar="WIDTH", help=("wrap lines longer than WIDTH; set to 0 to disable wrapping"), ) html = parser.add_argument_group("html") html.add_argument( "-i", "--display-index", action="store_true", help="open index file in web browser", ) return parser def main(): parser = get_parser() args = parser.parse_args() # if ran with no arguments print usage and exit if len(sys.argv) == 1: # TODO: make usage more informative parser.print_usage() sys.exit(1) config = Config() config.read_config() if args.linewrap: config["linewrap"] = args.linewrap linewrap = int(config["linewrap"]) # create configuration file if args.create_config: config_file = config.create_default_config() print("New config file created: {}".format(config_file)) sys.exit(0) # handle word translation if args.word: use_cache = not args.refresh to_eng = args.pol_eng try: translation = translate(args.word, config, use_cache, to_eng) wrapped_text = wrap_text(translation, linewrap) print(wrapped_text) sys.exit(0) except WordNotFound: sys.exit(1) # open index file in browser if args.display_index: try: display_index(config) sys.exit(0) except FileNotFoundError: sys.exit(1) PK!]}8JJdikicli/core.pyimport configparser import html import logging import os import re import shutil import sys import textwrap import urllib.parse import urllib.request import webbrowser from collections import namedtuple from itertools import zip_longest from pathlib import Path from bs4 import BeautifulSoup from .templates import CONFIG_TEMPLATE from .templates import HTML_TEMPLATE __version__ = "0.3" Meaning = namedtuple("Meaning", ["meaning", "examples"]) PartOfSpeech = namedtuple("PartOfSpeech", ["part", "meanings"]) Translation = namedtuple("Translation", ["word", "parts_of_speech"]) XDG_DATA_HOME = os.environ.get("XDG_DATA_HOME", "~/.local/share") XDG_CACHE_HOME = os.environ.get("XDG_CACHE_HOME", "~/.cache") XDG_CONFIG_HOME = os.environ.get("XDG_CONFIG_HOME", "~/.config") DATA_DIR = Path( os.environ.get("DIKI_DATA_DIR", os.path.join(XDG_DATA_HOME, "dikicli")) ).expanduser() CACHE_DIR = Path( os.environ.get("DIKI_CACHE_DIR", os.path.join(XDG_CACHE_HOME, "dikicli")) ).expanduser() CONFIG_FILE = Path( os.environ.get( "DIKI_CONFIG_FILE", os.path.join(XDG_CONFIG_HOME, "dikicli", "diki.conf") ) ).expanduser() DEBUG = os.environ.get("DIKI_DEBUG") logger = logging.getLogger(__name__) class WordNotFound(Exception): pass class Config: def __init__(self): self.config_file = CONFIG_FILE self.default_config = { "data dir": DATA_DIR.as_posix(), "linewrap": "78", "colors": "yes", "web browser": "default", } self.config = configparser.ConfigParser( defaults=self.default_config, default_section="dikicli" ) def __getitem__(self, key): return self.config["dikicli"][key] def __setitem__(self, key, value): self.config["dikicli"][key] = value def read_config(self): """ Read config from a file. Invalid config values will be discarded and defaults used in their place. """ _config = self.config["dikicli"] # TODO: what if file doesn't exist? if self.config_file.is_file(): logger.debug("Reading config file: %s", self.config_file.as_posix()) with open(self.config_file, mode="r") as f: self.config.read_file(f) # DIKI_DATA_DIR should always take precedence if it's set if "DIKI_DATA_DIR" in os.environ: _config["data dir"] = DATA_DIR.as_posix() w = _config.get("linewrap") try: w = int(w) if w < 0: raise ValueError() except ValueError: logger.warning("Config: Invalid linewrap value. Using default.") _config["linewrap"] = self.default_config["linewrap"] c = _config.get("colors") if c.lower() not in ["yes", "no", "true", "false"]: logger.warning("Config: Invalid colors value. Using default.") _config["colors"] = self.default_config["colors"] def create_default_config(self): """Write default config file to disk. Backs up existing configuration file. Returns ------- filename : string Path to config file. """ filename = self.config_file.as_posix() logger.info("Creating default config file: %s", filename) config_dir = self.config_file.parent if not config_dir.exists(): config_dir.mkdir(parents=True) if self.config_file.is_file(): backup = filename + ".old" logger.info("Saving config file backup at: %s", backup) shutil.copy(filename, backup) with open(self.config_file, mode="w") as f: config_string = CONFIG_TEMPLATE.format( data_dir=self.default_config["data dir"], linewrap=self.default_config["linewrap"], colors=self.default_config["colors"], browser=self.default_config["web browser"], ) f.write(config_string) return filename def _parse_html(html_dump, native=False): """Parse html string. Parameters ---------- html_dump : str HTML content. native : bool, optional Whether to translate from native to foreign language. Returns ------- translations : list Translations list. Raises ------ WordNotFound If word can't be found. """ # pylint: disable=too-many-locals soup = BeautifulSoup(html_dump, "html.parser") translations = [] for entity in soup.select( "div.diki-results-left-column > div > div.dictionaryEntity" ): if not native: meanings = entity.select("ol.foreignToNativeMeanings") else: meanings = entity.select("ol.nativeToForeignEntrySlices") if not meanings: # this can happen when word exists in both polish and english, e.g. 'pet' continue word = tuple(e.get_text().strip() for e in entity.select("div.hws h1 span.hw")) parts = [p.get_text().strip() for p in entity.select("span.partOfSpeech")] parts_list = [] for part, m in zip_longest(parts, meanings): meanings = [] for elem in m.find_all("li", recursive=False): examples = [] if not native: meaning = [m.get_text().strip() for m in elem.select("span.hw")] pattern = re.compile(r"\s{3,}") for e in elem.find_all("div", class_="exampleSentence"): example = re.split(pattern, e.get_text().strip()) examples.append(example) else: meaning = [elem.find("span", recursive=False).get_text().strip()] # When translating to polish 'examples' are just synonyms of translation synonyms = ", ".join( sorted( set( x.get_text().strip() for x in elem.select("ul > li > span.hw") ) ) ) if synonyms: examples.append([synonyms, None]) meanings.append(Meaning(meaning, examples)) parts_list.append(PartOfSpeech(part, meanings)) translations.append(Translation(word, parts_list)) if translations: return translations # if translation wasn't found check if there are any suggestions suggestions = soup.find("div", class_="dictionarySuggestions") if suggestions: raise WordNotFound(suggestions.get_text().strip()) raise WordNotFound("Nie znaleziono tłumaczenia wpisanej frazy") def _parse_cached(html_dump): """Parse html string from cached html files. Parameters ---------- html_dump : string HTML content Returns ------- translations : list Translations list. """ soup = BeautifulSoup(html_dump, "html.parser") translations = [] for trans in soup.find_all("div", class_="translation"): word = tuple(t.get_text() for t in trans.select("div.word > h2")) trans_list = [] for part in trans.find_all("div", class_="part-of-speech"): pn = part.find("p", class_="part-name") if pn: pn = pn.get_text().strip("[]") meanings = [] for meaning in part.find_all("div", class_="meaning"): m = [mn.get_text() for mn in meaning.select("li > span")] examples = [] for e in meaning.find_all("p"): examples.append([ex.get_text() for ex in e.find_all("span")]) meanings.append(Meaning(m, examples)) trans_list.append(PartOfSpeech(pn, meanings)) translations.append(Translation(word, trans_list)) return translations def _cache_lookup(word, data_dir, native=False): """Checks if word is in cache. Parameters ---------- word : str Word to check in cache. data_dir : pathlib.Path Cache directory location. Returns ------- translation : str or None Translation of given word. """ trans_dir = "translations" if native: trans_dir += "_native" logger.debug("Cache lookup: %s", word) filename = data_dir.joinpath(trans_dir, "{}.html".format(word)) if filename.is_file(): with open(filename, mode="r") as f: logger.debug("Cache found: %s", word) # TODO: not sure if we should parse data here translation = _parse_cached(f.read()) return translation logger.debug("Cache miss: %s", word) return None def _get_words(data_dir): """Get list of words from history file. Parameters ---------- data_dir : pathlib.Path Directory where data is saved. Returns ------- word_list : list of str List of words. """ words_file = data_dir.joinpath("words.txt") word_list = [] if not words_file.is_file(): return word_list with open(words_file, mode="r") as f: for l in f: line = l.rstrip() word_list.append(line) return word_list def _save_to_history(word, data_dir): """Write word to history file. Parameters ---------- word : str Word to save to history. data_dir : pathlib.Path Directory where history file should be saved. data_dir and it's parent directories will be created if needed. """ if not data_dir.exists(): logger.debug("Creating DATA DIR: %s", data_dir.as_posix()) data_dir.mkdir(parents=True) if word not in _get_words(data_dir): with open(data_dir.joinpath("words.txt"), mode="a+") as f: logger.debug("Adding to history: %s", word) f.write(word + "\n") def _create_html_file_content(translations): """Create html string out of translation dict. Parameters ---------- tralnslations : dict Dictionary of word translations. Returns ------- str: html string of translation """ content = [] for i1, t in enumerate(translations): if i1 > 0: content.append("
") content.append('
') content.append('
') for w in t.word: content.append("

{word}

".format(word=w)) content.append("
") # end `word` for i2, t2 in enumerate(t.parts_of_speech): if i2 > 0: content.append("
") content.append('
') if t2.part is not None: content.append('

[{part}]

'.format(part=t2.part)) content.append("
    ") for m in t2.meanings: content.append('
    ') mng = ["
  1. "] for i3, mn in enumerate(m.meaning): if i3 > 0: mng.append(", ") mng.append("{meaning}".format(meaning=mn)) mng.append("
  2. ") content.append("".join(mng)) content.append('
    ') for e in m.examples: content.append( "

    {ex}
    {tr}

    " "".format(ex=e[0], tr=e[1]) ) content.append("
    ") # end `examples` content.append("
    ") # end `meaning` content.append("
") content.append("
") # end `part-of-speech` content.append("
") # end `translation` return "\n".join(content) def _write_html_file(word, translations, data_dir, native=False): """Create html file of word translations. Parameters ---------- word : str Word that was translated. tralnslations : dict Dictionary of word translations. data_dir : pathlib.Path Location where html files are saved. """ content_str = _create_html_file_content(translations) html_string = HTML_TEMPLATE.replace("{% word %}", word) html_string = html_string.replace("{% content %}", content_str) trans_dir = "translations" if native: trans_dir += "_native" translations_dir = data_dir.joinpath(trans_dir) fname = translations_dir.joinpath("{word}.html".format(word=word)) save_file(fname, html_string, mk_parents=True) def _create_index_content(words): """Create html string of index file. Parameters ---------- words : list of str List of cached words. Returns ------- str html string. """ content = ["

Index

", "") if not words: content.append("Nothing to see here ...yet!") return "\n".join(content) def _write_index_file(data_dir): """Create index file of cached translations. Parameters ---------- data_dir : pathlib.Path Cache directory location. """ cached_words = [ w for w in _get_words(data_dir) if data_dir.joinpath("translations/{}.html".format(w)).is_file() ] content_str = _create_index_content(cached_words) html_string = HTML_TEMPLATE.replace("{% word %}", "Index") html_string = html_string.replace("{% content %}", content_str) filename = data_dir.joinpath("index.html") save_file(filename, html_string, mk_parents=True) def save_file(filename, data, mk_parents=True): """Save file to disk. Paramaters ---------- filename : pathlib.Path Path to the file. data : str File contents. mk_parents : bool, optional If to create parent directories. """ parent = filename.parent if not parent.exists() and mk_parents: logger.debug("Creating directory: %s", parent.as_posix()) parent.mkdir(parents=True) with open(filename, mode="w") as f: logger.debug("Saving file: %s", filename.as_posix()) f.write(data) def _lookup_online(word): """Look up word on diki.pl. Parameters ---------- word : str Word too look up. Returns ------- str website HTML content. """ URL = "https://www.diki.pl/{word}" HEADERS = { "User-Agent": ( "Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; " "Trident/7.0; rv:11.0) like Gecko" ) } logger.debug("Looking up online: %s", word) quoted_word = urllib.parse.quote(word) req = urllib.request.Request(URL.format(word=quoted_word), headers=HEADERS) with urllib.request.urlopen(req) as response: html_string = response.read().decode() return html.unescape(html_string) def translate(word, config, use_cache=True, to_eng=False): """Translate a word. Parameters ---------- word : str Word to translate. config : Config Configuration settings. use_cache : bool, optional Wheter to use cache. to_eng : bool, optional Translate from Polish to English. Returns ------- translation Translation of a word. Raises ------ WordNotFound If word can't be found. """ translation = None data_dir = Path(config["data dir"]) if use_cache: logger.debug("Checking cache: %s", word) translation = _cache_lookup(word, data_dir, native=to_eng) if translation: return translation html_dump = _lookup_online(word) try: translation = _parse_html(html_dump, native=to_eng) except WordNotFound as exn: logger.error(str(exn)) raise exn _write_html_file(word, translation, data_dir, native=to_eng) if not to_eng: _save_to_history(word, data_dir) _write_index_file(data_dir) return translation def display_index(config): """Open index in web browser. Parameters ---------- config : Config Configuration settings. Raises ------ FileNotFoundError If index file doesn't exist. """ browser = config["web browser"].lower() data_dir = Path(config["data dir"]) if browser in webbrowser._browsers: b = webbrowser.get(browser) else: if browser != "default": logger.warning( "Couldn't find '%s' browser. Falling back to default.", browser ) b = webbrowser.get() index_file = data_dir.joinpath("index.html") if not index_file.exists(): logger.error("File doesn't exist: %s", index_file.as_posix()) raise FileNotFoundError("Index file doesn't exist") else: logger.info("Opening %s in '%s'", index_file.as_posix(), b.name) b.open(index_file.as_uri()) def wrap_text(translations, linewrap=0): """Pretty print translations. If linewrap is set to 0 disble line wrapping. Parameters ---------- translations : list List of word translations. linewrap : int Maximum line length before wrapping. """ # pylint: disable=too-many-locals def wrap(text, width=linewrap, findent=0, sindent=0, bold=False): if width == 0: text = " " * findent + text else: text = textwrap.fill( text, width=width, initial_indent=" " * findent, subsequent_indent=" " * sindent, ) # don't use bold when stdout is pipe or redirect if bold and sys.stdout.isatty(): text = "\033[0;1m" + text + "\033[0m" return text indent = 5 result = [] for i1, trans in enumerate(translations): if i1 > 0: result.append("\n") for w in trans.word: result.append(wrap(w, bold=True)) for i2, t in enumerate(trans.parts_of_speech): if i2 > 0: result.append("") if t.part: result.append("[{part}]".format(part=t.part)) for i3, m in enumerate(t.meanings, 1): if i3 > 1: result.append("") meaning = "{index:>3}. {meanings}".format( index=i3, meanings=", ".join(m.meaning) ) result.append(wrap(meaning, sindent=indent, bold=True)) eindent = indent + 1 for e in m.examples: result.append("") result.append(wrap(e[0], findent=eindent, sindent=eindent)) if e[1]: result.append(wrap(e[1], findent=eindent, sindent=eindent + 1)) return "\n".join(result) PK!FFdikicli/templates.pyHTML_TEMPLATE = """\ {% word %} {% content %} """ CONFIG_TEMPLATE = """\ [dikicli] # Location where html and history files are stored # default: {data_dir} data dir = {data_dir} # Lines longer that linewrap value will be wrapped in terminal # set to 0 to disable line wrapping # default: {linewrap} linewrap = {linewrap} # Whether to use colors/styles in terminal # valid options: yes no true false # default: {colors} colors = {colors} # Web browser used to display index file # default: {browser} web browser = {browser} """ PK!H %&&dikicli-0.3.dist-info/entry_points.txtN+I/N.,()Jδz9Vy\\PK!%w ++dikicli-0.3.dist-info/LICENSEMIT License Copyright (c) 2018 Dawid Zych Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PK!HnHTUdikicli-0.3.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!H dikicli-0.3.dist-info/METADATASMs0WLSH0dIm-*KBZ55U|}_c7 Q;[U;]BL]a0Vtrі@[bMVzlr]T@N>ViOeiԆl= ӣF_^l"qB+u*h@mv$iEՆ~&(WfS>NŗoNf=+[ĺM&J4~Pf]]i+u>/ItQeLdikicli-0.3.dist-info/RECORDu;@| aEDP ¯?ֺ*N3p(ZbN~ ˕osP1yfТaE%n 0GHBuNwF]tN<e e4#$iH%X zk#ZQ RS F%7PF +-A}Zeܧ݋dyC]-Rjir Oؔ3*KDɼhf/4gZd}^D%<ɰ3^BhrҵfJry]쩷Ƅ=MHGaCgK̡-YǶ ۝ˈ*TQ,3?504m4wS>}}B!7Wu;4xA|#ښVO4CSNUiX1Ʉ&s!& z7?PK!dikicli/__init__.pyPK!ڠ'NPP1dikicli/cli.pyPK!]}8JJdikicli/core.pyPK!FFYdikicli/templates.pyPK!H %&&1]dikicli-0.3.dist-info/entry_points.txtPK!%w ++]dikicli-0.3.dist-info/LICENSEPK!HnHTUbdikicli-0.3.dist-info/WHEELPK!H bdikicli-0.3.dist-info/METADATAPK!H>ddikicli-0.3.dist-info/RECORDPK zf