PK! qtwilog_web_archiver/__init__.py__version__ = '0.1.0' PK!% twilog_web_archiver/main.pyfrom typing import List from bs4 import BeautifulSoup import click import requests import savepagenow def get_month_links(screen_name: str) -> List[str]: """Get month link for the screen_name.""" url = f'https://twilog.org/{screen_name}/archives' s = get_soup(url) return list( filter(lambda x: 'month-' in x, map(lambda x: x['href'], s.select('section.main-list-box1 .side-list li a')))) def get_soup(url: str) -> BeautifulSoup: """Get BeautifulSoup object for the url.""" r = requests.get(url) return BeautifulSoup(r.text, 'lxml') def parse_month(month_link: str) -> None: """Archive all the pages of the month list.""" archive_url, _ = savepagenow.capture_or_cache(month_link) print('archived:', archive_url) s = get_soup(month_link) next_links = s.select('.nav-next a') if len(next_links) != 0: next_link = next_links[0]['href'] parse_month(next_link) @click.command() @click.argument('screen_name') def archive_user_page(screen_name): """Archvie all month list pages for the specified screen_name into Wayback Machine.""" month_links = get_month_links(screen_name) for month_link in month_links: parse_month(month_link) def main(): archive_user_page() if __name__ == '__main__': main() PK!H29E:twilog_web_archiver-0.1.0.post1.dist-info/entry_points.txtN+I/N.,()*)O-OMM,J,K-abzyV PK!HnHTU/twilog_web_archiver-0.1.0.post1.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!HdY2twilog_web_archiver-0.1.0.post1.dist-info/METADATAr0z"L-a0M:`! 0!m#Vb[$syJ!ƋRmdY9EmaWZȂBtЂ圂ًL&x#TWC|RJm:h]9SG k¤ md jWD#TZ >O2RcJMDH,NY&EsB[rcpƌݹǽT[g'c7Iޅp=z󜉌B}-*ˣm6\{(cZ'ZP 3Rߺd /b,( W6]ۥby8gERټ9'5+'[ܼB{p5/:^MLl}q W C9GPvɵ꒮=*hmEOܟEuO:A ylf5ⶏ]}\J < =`Arq -*@} rc]Y 2t& G(qt JQ84> hVa=ZNj~oPK!Hv?0twilog_web_archiver-0.1.0.post1.dist-info/RECORDѻv0`!2tx  !Os:W kN81 Id"@5q@ $\4lqFY C/ɱbr$ =O ePNrK:?|:z)j#Ƌ %;V ^ZWzB f% >F(BS7j`o;Sұ}^upu-NJz;ok"1o(  EZ Gy»:0H ʴ-@^^ڐ PK! qtwilog_web_archiver/__init__.pyPK!% Stwilog_web_archiver/main.pyPK!H29E:twilog_web_archiver-0.1.0.post1.dist-info/entry_points.txtPK!HnHTU/9twilog_web_archiver-0.1.0.post1.dist-info/WHEELPK!HdY2twilog_web_archiver-0.1.0.post1.dist-info/METADATAPK!Hv?0E twilog_web_archiver-0.1.0.post1.dist-info/RECORDPK