PK! qtwilog_web_archiver/__init__.py__version__ = '0.1.0' PK!% twilog_web_archiver/main.pyfrom typing import List from bs4 import BeautifulSoup import click import requests import savepagenow def get_month_links(screen_name: str) -> List[str]: """Get month link for the screen_name.""" url = f'https://twilog.org/{screen_name}/archives' s = get_soup(url) return list( filter(lambda x: 'month-' in x, map(lambda x: x['href'], s.select('section.main-list-box1 .side-list li a')))) def get_soup(url: str) -> BeautifulSoup: """Get BeautifulSoup object for the url.""" r = requests.get(url) return BeautifulSoup(r.text, 'lxml') def parse_month(month_link: str) -> None: """Archive all the pages of the month list.""" archive_url, _ = savepagenow.capture_or_cache(month_link) print('archived:', archive_url) s = get_soup(month_link) next_links = s.select('.nav-next a') if len(next_links) != 0: next_link = next_links[0]['href'] parse_month(next_link) @click.command() @click.argument('screen_name') def archive_user_page(screen_name): """Archvie all month list pages for the specified screen_name into Wayback Machine.""" month_links = get_month_links(screen_name) for month_link in month_links: parse_month(month_link) def main(): archive_user_page() if __name__ == '__main__': main() PK!H29E:twilog_web_archiver-0.1.0.post3.dist-info/entry_points.txtN+I/N.,()*)O-OMM,J,K-abzyV PK!HnHTU/twilog_web_archiver-0.1.0.post3.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!Hۥi2twilog_web_archiver-0.1.0.post3.dist-info/METADATATMS0WZrSxL3f(G";-WJLMfZ,{[a[Nm*B6\`W2S ]rr)4ZZTdV9u3¦Ic0PL*#uRkK~"mZE,VoҪzHƢ0w5р4-7^)0J<,Ɨ **f0̆0[?ȹBh^CW%0tR#랰9ϸ1I p0kVKp=\Bh2o8BUeqċ¼6+(~l ,v9~}nQO:c-K9 GaqSVtTՖTgê,bK泥XZkw!Leiq&96(,K7=/TK!"U -B(Zpsr]˳lC(SOqMJ,AnȽA"r8^9}!pT)7 &Ih]~P<yxu%6|E箬Lu0`#'OO LPK! qtwilog_web_archiver/__init__.pyPK!% Stwilog_web_archiver/main.pyPK!H29E:twilog_web_archiver-0.1.0.post3.dist-info/entry_points.txtPK!HnHTU/9twilog_web_archiver-0.1.0.post3.dist-info/WHEELPK!Hۥi2twilog_web_archiver-0.1.0.post3.dist-info/METADATAPK!H8<0 twilog_web_archiver-0.1.0.post3.dist-info/RECORDPKB