PK!]|tweakers/__init__.py"""A Python wrapper for https://tweakers.net""" from . import gathering, topic from .user import User from .utils import login __version__ = "0.3.2"PK! tweakers/__version__.py__version__ = "0.3.2"PK!O@aatweakers/comment.py""" Tweakers.net comments. """ from .user import User class Comment: """ A comment on a gathering.tweakers.net topic """ def __init__(self, **kwargs) -> None: for k, v in kwargs.items(): if k == "username": setattr(self, "user", User(name=v)) else: setattr(self, k, v) PK!/\tweakers/gathering.py""" gathering.tweakers.net (forums). """ from typing import List from requests_html import HTMLResponse from .utils import fetch from .topic import Topic from . import parsers url = "https://gathering.tweakers.net" def active_topics() -> List[Topic]: response: HTMLResponse = fetch(url=f"{url}/forum/list_activetopics") topics: List = [Topic(**d) for d in parsers.active_topics(response.html)] return topics def search(query: str) -> List[Topic]: """Search for topics given a query. :param query: Search query. """ response: HTMLResponse = fetch(url=f"{url}/forum/find?keyword={query}") topics: List = [Topic(**d) for d in parsers.search_topics(response.html)] return topics def bookmarks() -> List[Topic]: """Get a list of bookmarks :return: Bookmarks """ response: HTMLResponse = fetch(url=f"{url}/forum/list_bookmarks") topics: List = [Topic(**d) for d in parsers.bookmark_topics(response.html)] return topics PK!M'D D tweakers/parsers.py""" HTML parsers. """ from typing import Dict, Generator, Union from requests_html import HTML import dateparser def get_comment_count(html: HTML) -> int: comment_count: int try: comment_count = int(html.find(".commentCount", first=True).text) except AttributeError: # Also count the opening post comment_count = 1 return comment_count def active_topics(html: HTML) -> Generator[dict, None, None]: for tr in html.find(".listing tr")[1:]: topic: Dict = { "title": tr.find(".topic a", first=True).text, "url": tr.find(".topic a", first=True).attrs["href"], "poster": tr.find(".poster", first=True).text, "last_reply": dateparser.parse( tr.find(".time a", first=True).text, languages=["nl"] ), "comment_count": get_comment_count(tr), } yield topic def search_topics(html: HTML) -> Generator[dict, None, None]: for tr in html.find(".forumlisting tr")[2::2]: topic: Dict = { "title": tr.find(".title a", first=True).text, "url": tr.find(".title a", first=True).attrs["href"], "last_reply": dateparser.parse( tr.find(".time a", first=True).text, languages=["nl"] ), } yield topic def bookmark_topics(html: HTML) -> Generator[dict, None, None]: for tr in html.find(".listing tr.alt")[1:]: topic: Dict = { "title": tr.find(".topic a")[1].text, "url": tr.find(".topic a")[1].attrs["href"], "last_reply": dateparser.parse( tr.find(".time a", first=True).text, languages=["nl"] ), } yield topic def get_rating(div): rating: int try: rating = int(div.find("span.ratingcount", first=True).text.replace("+", "")) except AttributeError: # post too old for rating rating = 0 return rating def topic_comments(html: Union[HTML, str]) -> Generator[dict, None, None]: if isinstance(html, str): html = HTML(html=html) for div in html.find(".message"): message: Dict = { "id": int(div.attrs["data-message-id"]), "username": div.find("a.user", first=True).text, "date": dateparser.parse( div.find("div.date", first=True).text, languages=["nl"] ), "url": div.find(".date p a", first=True).attrs["href"], "rating": get_rating(div), "text": div.find(".messagecontent", first=True).text, "html": div.html, } yield message PK!_ tweakers/topic.py""" gathering.tweakers.net topics """ import time from typing import Union, List, Generator from requests_html import HTMLResponse from .utils import id_from_url, fetch from .comment import Comment from . import parsers class Topic: """ A topic on gathering.tweakers.net """ def __init__(self, url, **kwargs) -> None: self.__dict__.update(kwargs) self.url: str = url self.id: int = id_from_url(self.url) def comments(self, page: Union[int, str]) -> List[Comment]: """ Get comments for a specific Topic page :param page: Page number (zero indexed) or 'last' for last page. :return: A list of Comment objects. """ response: HTMLResponse = fetch(url=f"{self.url}/{page}") return [Comment(**d) for d in parsers.topic_comments(response.html)] def comment_stream( self, refresh: int = 15, last: int = 3 ) -> Generator[Comment, None, None]: """ Generator of new comments. :param refresh: Refresh timer in seconds (default 15). :param last: Number of already posted comments to return (default 15). :return: A generator of Comment objects. """ # get the last posted comment id, required for getting new comments comments: List = self.comments("last") last_message_id: int = comments[-1].id # yield last n comments for comment in comments[-last:]: yield comment timer: int = refresh while True: epoch_time: int = int(time.time()) # required for getting new comments ajax_url: str = f"https://gathering.tweakers.net/ajax/list_new_messages/{self.id}/{last_message_id}?output=json\ &nocache={epoch_time}" comments = self.get_new_comments(ajax_url) if comments: last_message_id = comments[-1].id for comment in comments: yield comment # decrease timer each second while timer > 0: # pragma: no cover time.sleep(1) timer -= 1 else: # pragma: no cover timer = refresh def get_new_comments(self, ajax_url: str) -> List[Comment]: # pragma: no cover """ Get new comments for a given ajax_url :param ajax_url: Ajax url to query for new comments. :return: A list of comment objects. """ json = fetch(ajax_url).json() new_comments: List try: html: str = "".join(json["data"]["messages"]) new_comments = [Comment(**d) for d in parsers.topic_comments(html)] except KeyError: # no new messages new_comments = [] return new_comments PK!=Htweakers/user.py""" A tweakers.net user. """ class User: """A Tweakers.net user""" def __init__(self, **kwargs) -> None: required = ["id", "name"] assert any( x in required for x in kwargs ), f"Missing one of required keywords: {required}" for k, v in kwargs.items(): if k == "id": k = "_id" setattr(self, k, v) PK! >u u tweakers/utils.py""" Utilities. """ from requests_html import HTMLSession, HTMLResponse session = HTMLSession() def fetch(url: str) -> HTMLResponse: """ :param url: Url to fetch. :return: HTMLResponse. """ _require_cookies() response = session.get(url) if not 200 >= response.status_code < 300: raise Exception(f"Url {url} returned a {response.status_code}") return response def _require_cookies() -> None: """ Get cookies if not already accepted """ if len(session.cookies) > 2: return url = "https://tweakers.net" response = session.get(url) token = response.html.find("input[name=tweakers_token]")[0].attrs["value"] data = {"decision": "accept", "tweakers_token": token} session.post(url="https://tweakers.net/my.tnet/cookies", data=data) def id_from_url(url: str) -> int: """ Parse the id from the URL :param url: url to get id from. :raises NotImplementedError: If getting id from this url is not supported. :return: integer id. """ parts = url.split("/") if "aanbod" in parts: id = parts[parts.index("aanbod") + 1] elif "list_messages" in parts: id = parts[parts.index("list_messages") + 1] elif "pricewatch" in parts: id = parts[parts.index("pricewatch") + 1] else: raise NotImplementedError( "Getting the id for url ({url}) is not yet implemented" ) return int(id) def login(username: str, password: str) -> None: # pragma: no cover url = "https://tweakers.net/my.tnet/login/" response = session.get(url) try: token = response.html.find("input[name=tweakers_login_form\[_token\]]")[0].attrs["value"] except IndexError: # Already logged in return data = {"tweakers_login_form[_token]": token, "tweakers_login_form[user]": username, "tweakers_login_form[password]": password } login_response = session.post(url=url, data=data) login_error_text = "De combinatie van gebruikersnaam of e-mailadres en wachtwoord is onjuist." if login_error_text in login_response.text: raise ValueError("Invalid username or password.") captcha_error_text = "Om te bewijzen dat je geen robot bent, moet een captcha worden ingevuld." if captcha_error_text in login_response.text: raise Exception("Captcha warning triggered, unable to login!") PK!Y'%% tweakers-0.3.2.dist-info/LICENSEMIT License Copyright (c) 2018 Timo Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PK!H_zTTtweakers-0.3.2.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]n0H*J>mlcAPK!H\[Ŏ=!tweakers-0.3.2.dist-info/METADATAUn6}WL퇕l7-" ;]AҬF$eWߡn[~09s93ߡ)w++T9\b nn.y0䦎a 媄ZlYD$5[qeՂ%JFNH^m[`i {w3 61Zhy>Y$V}5w9{̷!JV$$hm+ɼ.'NiA㏔%|2^}4]1'\m]"7I~s@7065:U(٦MƄA҆nXmvtAŢ^$G6};< 7)~W5XEp3oTWxAiVܿ:\/U([6v~{xz|y~|}AkOWǽZW]SEU%_|yFU|v6>i|J͹K PK!H{6tweakers-0.3.2.dist-info/RECORD}9@AL("Pl" BȯW255s<]E%*q-_bq^3ΣMys_xJEv+(-nřP_c=ާl.5)u[NKu*I6mļB6u#<09<M.2E>Na֙ ibŬ>TKޕahn`]+6>QeNxq׿کm6sZ$FHfiu+DXit&eAQћ{S\9;/Rr^5 F'?4"zFt{;ם\RrLwH|WLh]3 b%VpYͻ]z[YM:=#{1gK,p?uqn rZeVxQs+<֌lB~=f~ u u tweakers/utils.pyPK!Y'%% 'tweakers-0.3.2.dist-info/LICENSEPK!H_zTT+tweakers-0.3.2.dist-info/WHEELPK!H\[Ŏ=!|,tweakers-0.3.2.dist-info/METADATAPK!H{6/tweakers-0.3.2.dist-info/RECORDPK >k2