PK!vpyproject.toml[build-system] requires = ["poetry>=0.12"] build-backend = "poetry.masonry.api" [tool.poetry] name = "reading-helper" description = "" version = "0.1.7" authors = ["Nathan Hunt "] license = "MIT" readme = "README.md" repository = "https://github.com/neighthan/reading-helper" include = ["pyproject.toml", "template.md"] [tool.poetry.scripts] make_paper_page = "reading_helper:scripts_api.make_paper_page" search_tags = "reading_helper:scripts_api.search_tags" [tool.poetry.dependencies] python = "^3.7" toml = "^0.10.0" [tool.poetry.dev-dependencies] pre-commit = "^1.14" pytest = "^4.0" pytest-cov = "^2.6" twine = "^1.13" invoke = {git = "https://github.com/neighthan/invoke.git"} ipykernel = "^5.1" PK!`reading_helper/__init__.pyimport toml from pathlib import Path pyproject = toml.load(str(Path(__file__).parents[1] / "pyproject.toml")) __version__ = pyproject["tool"]["poetry"]["version"] del pyproject PK!2kt!reading_helper/make_paper_page.pyimport requests from datetime import datetime from bs4 import BeautifulSoup from argparse import ArgumentParser from pathlib import Path from typing import Dict, Optional def parse_args(): parser = ArgumentParser() parser.add_argument("short_title", help="Short title for paper; used to name file.") parser.add_argument("-u", "--url", default="") parser.add_argument( "-j", "--journal", help="Journal published in [default = arXiv if 'arxiv' in url else '']", default="", ) parser.add_argument( "-d", "--papers_dir", help="Directory where papers should be saved (default = current directory).", default=".", ) return parser.parse_args() def extract_metadata(short_title: str, url: str, journal: str = "") -> Dict[str, str]: journal = "arXiv" if "arxiv" in url and not journal else journal paper_data = { "url": url, "added_date": str(datetime.now().date()), "short_title": short_title, "journal": journal, "tags": "[]", "title": "", "authors": "[]", "year": "", "arxiv_id": "", } if "arxiv" in url: if "pdf" in url: url = url.replace(".pdf", "").replace("pdf", "abs") paper_data.update(extract_arxiv_data(url)) paper_data["authors"] = ( "[" + ", ".join(f'"{author}"' for author in paper_data["authors"]) + "]" ) elif url: response = requests.get(url) html = BeautifulSoup(response.content, "lxml") try: paper_data.update({"title": str(html.select_one("h1").string)}) except AttributeError: pass return paper_data def make_paper_page(short_title: str, url: str, journal: str, papers_dir: str = "."): paper_data = extract_metadata(short_title, url, journal) try: template = (Path(papers_dir) / "template.md").read_text() except FileNotFoundError: template = (Path(__file__).parents[1] / "template.md").read_text() template = template.format(**paper_data) new_paper_fname = short_title.lower().replace(" ", "_").replace("-", "_") + ".md" new_paper_path = ( Path(papers_dir) / new_paper_fname if papers_dir else Path(new_paper_fname) ) if new_paper_path.exists(): overwrite = input(f"Overwrite existing file {new_paper_fname} (y/n)? ") if overwrite != "y": return new_paper_path.write_text(template) def extract_arxiv_data(url: str) -> Dict[str, str]: paper_data = {} response = requests.get(url) html = BeautifulSoup(response.content, "lxml") meta_tags = html.select("meta") def filter_by_name(tags, name: str): return filter(lambda tag: tag.has_attr("name") and tag["name"] == name, tags) try: paper_data["title"] = next(filter_by_name(meta_tags, "citation_title"))[ "content" ] except StopIteration: pass paper_data["authors"] = [ tag["content"] for tag in filter_by_name(meta_tags, "citation_author") ] # format is YYYY/MM/DD (always?) # there's both a "citation_date" and a "citation_online_date"; I use "citation_date" try: paper_data["year"] = next(filter_by_name(meta_tags, "citation_date"))[ "content" ].split("/")[0] except StopIteration: pass try: paper_data["arxiv_id"] = next(filter_by_name(meta_tags, "citation_arxiv_id"))[ "content" ] except StopIteration: pass return paper_data def main() -> None: args = parse_args() make_paper_page(args.short_title, args.url, args.journal, args.papers_dir) if __name__ == "__main__": main() PK!R)ereading_helper/scripts_api.pyfrom .make_paper_page import main as _make_paper_page from .search_tags import main as _search_tags def make_paper_page(): _make_paper_page() def search_tags(): _search_tags() PK!7Mreading_helper/search_tags.pyimport re from argparse import ArgumentParser from pathlib import Path from typing import Sequence import pandas as pd def parse_args(): parser = ArgumentParser() parser.add_argument( "query", help="Boolean expression involving tags like `rl & (safe | symbolic)`. There must be spaces between tag names.", ) parser.add_argument( "-d", "--dir", default=".", help="Papers directoy (default = '.')." ) parser.add_argument("-p", "--print_path", action="store_true") args = parser.parse_args() return args def search_tags(query: str, papers_dir: str = ".", print_path: bool = False) -> None: papers_dir = Path(papers_dir) papers = list(papers_dir.rglob("*.md")) remove_chars = "()|&~" query_tags = query for char in remove_chars: query_tags = query_tags.replace(char, "") # remove empty strings query_tags = list(filter(lambda tag: tag, query_tags.split(" "))) # {synonym: canonical form} tag_synonyms = {"safety": "safe", "human in the loop": "human-loop"} pattern = re.compile("tags: \[(.*?)\]", re.DOTALL) all_paper_tags = [] for paper in papers: text = paper.read_text() match = pattern.search(text) if not match: print(f"{paper.stem} has no tags.") continue paper_tags = match.group(1).split(",") paper_tags = map( lambda t: t.lower().strip().replace('"', "").replace("'", ""), paper_tags ) paper_tags = [tag_synonyms.get(tag, tag) for tag in paper_tags] tag_matches = [tag in paper_tags for tag in query_tags] all_paper_tags.append([paper] + tag_matches) tags_df = pd.DataFrame(all_paper_tags, columns=["paper"] + query_tags) for paper in tags_df.query(query).paper: if print_path: print(paper) else: print(paper.stem) def main() -> None: args = parse_args() search_tags(args.query, args.dir, args.print_path) if __name__ == "__main__": main() PK!i{{ template.md--- title: "{title}" short_title: "{short_title}" authors: {authors} journal: "{journal}" year: {year} arxiv_id: "{arxiv_id}" url: "{url}" added_date: {added_date} tags: {tags} reading_depth: 0 --- # ADD TAGS AND READING DEPTH # [{title} ({short_title})][{short_title}] ## TLDR ## Aim ## Methods ## Results ## Pros ## Cons ## Notes ## Questions [{short_title}]: {url} PK!H5 L/reading_helper-0.1.7.dist-info/entry_points.txtN+I/N.,()MN/H,H-驶E)y9@1+ĂL=4\ũE%!)PK!D00&reading_helper-0.1.7.dist-info/LICENSEThe MIT License Copyright (c) 2019 Nathan Hunt Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PK!HڽTU$reading_helper-0.1.7.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!HB/'reading_helper-0.1.7.dist-info/METADATAQMO1W4 .bb@ʿw6"8y.5j 1}K*QC "}yv:gsoA]Hʲ`ݾ́jhhسIu&j(~55 -|&Bx4ٴ)WD}J݂OBMF6WQ[KROv_6Oɝ LBۆ_Gk.ސξoAiT| '>zj~~c%?gIŖȺM (CxYT_e"֦CWrpQ6#JjO?RAq>VB.ɐ1I 2J.gK ҆@Kr(7&{c|>gƅSJ 'HFtBLa%5-2 Ff jdcU- pG3JoV!^OH}3&Wj9Ƴ +X}s0i3hq= tejw_kԂ_,O:.] ڸZU`010n5ѾLm=3mr4:=P}- =96OROye,&EPK!vpyproject.tomlPK!`reading_helper/__init__.pyPK!2kt!reading_helper/make_paper_page.pyPK!R)ereading_helper/scripts_api.pyPK!7Mreading_helper/search_tags.pyPK!i{{ template.mdPK!H5 L/reading_helper-0.1.7.dist-info/entry_points.txtPK!D00&Dreading_helper-0.1.7.dist-info/LICENSEPK!HڽTU$"reading_helper-0.1.7.dist-info/WHEELPK!HB/'N#reading_helper-0.1.7.dist-info/METADATAPK!HG,%$reading_helper-0.1.7.dist-info/RECORDPK M1'