PK! qtwilog_web_archiver/__init__.py__version__ = '0.1.0' PK!% twilog_web_archiver/main.pyfrom typing import List from bs4 import BeautifulSoup import click import requests import savepagenow def get_month_links(screen_name: str) -> List[str]: """Get month link for the screen_name.""" url = f'https://twilog.org/{screen_name}/archives' s = get_soup(url) return list( filter(lambda x: 'month-' in x, map(lambda x: x['href'], s.select('section.main-list-box1 .side-list li a')))) def get_soup(url: str) -> BeautifulSoup: """Get BeautifulSoup object for the url.""" r = requests.get(url) return BeautifulSoup(r.text, 'lxml') def parse_month(month_link: str) -> None: """Archive all the pages of the month list.""" archive_url, _ = savepagenow.capture_or_cache(month_link) print('archived:', archive_url) s = get_soup(month_link) next_links = s.select('.nav-next a') if len(next_links) != 0: next_link = next_links[0]['href'] parse_month(next_link) @click.command() @click.argument('screen_name') def archive_user_page(screen_name): """Archvie all month list pages for the specified screen_name into Wayback Machine.""" month_links = get_month_links(screen_name) for month_link in month_links: parse_month(month_link) def main(): archive_user_page() if __name__ == '__main__': main() PK!H29E:twilog_web_archiver-0.1.0.post2.dist-info/entry_points.txtN+I/N.,()*)O-OMM,J,K-abzyV PK!HnHTU/twilog_web_archiver-0.1.0.post2.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!HM@2twilog_web_archiver-0.1.0.post2.dist-info/METADATAr0z"L-a0M:`! 0!m#Vb[$syJ!ƋRmdY9EmaWZȂBtЂ圂ًL&x#TWC|RJmh]9SG k¤ md jWD#TZ >O2RcJMDH,NY&EsB[rcpƌݹǽT[g'c7Iޅp=z󜉌B}-*ˣm6\{(cZ'ZP 3Rߺd /b,( W6]ۥby8gERټ9'5+'[ܼB{p5/:^MLl}q W C9GPvɵ꒮=*hmEOܟEuO:A ylf5ⶏ]}\J < =`Arq -*@} rc]Y 2t& G(qt JQ84> hVa=ZNj~oPK!Hl?0twilog_web_archiver-0.1.0.post2.dist-info/RECORDѹ0~% r[d1MF#G![9[W%Kq񉓌b>˜T`,7h61>WyugGم~ LJrX\|дZhn6..W̝>{y2:fUbUڮ;bjJʊܰVhBuFq-Fk.`fk:RL'MNaP=B둅$^dEhT!бow=KZz2p"o|dߵ5ȊkB>Ð;VGcGzY܋ǙiByP3$SȀ/C|PK! qtwilog_web_archiver/__init__.pyPK!% Stwilog_web_archiver/main.pyPK!H29E:twilog_web_archiver-0.1.0.post2.dist-info/entry_points.txtPK!HnHTU/9twilog_web_archiver-0.1.0.post2.dist-info/WHEELPK!HM@2twilog_web_archiver-0.1.0.post2.dist-info/METADATAPK!Hl?0E twilog_web_archiver-0.1.0.post2.dist-info/RECORDPK