PKBSN@sensebook/__init__.py"""Making sense of Facebooks undocumented API.""" from ._utils import ( default_user_agent, parse_form, build_url, strip_json_cruft, load_json, time_from_millis, random_hex, safe_status_code, ) from ._abc import State, Request from ._login import LoginError from ._pull_handler import ProtocolError, Backoff, PullRequest, PullHandler __version__ = "0.2.0" __all__ = () PKBSNumRsensebook/_abc.pyimport abc import attr from typing import Dict, Any, Optional from ._utils import build_url @attr.s(slots=True, kw_only=True) class State(metaclass=abc.ABCMeta): """Core state storing, and methods for logging in/out of Facebook.""" revision = attr.ib(type=str) fb_dtsg = attr.ib(type=str) @property @abc.abstractmethod def cookies(self) -> Dict[str, str]: raise NotImplementedError @attr.s(slots=True, kw_only=True, frozen=True) class Request(metaclass=abc.ABCMeta): """Defines a generic way of specifying HTTP requests.""" @property @abc.abstractmethod def method(self) -> str: raise NotImplementedError @property @abc.abstractmethod def host(self) -> str: raise NotImplementedError @property @abc.abstractmethod def target(self) -> str: raise NotImplementedError @property @abc.abstractmethod def params(self) -> Dict[str, Any]: raise NotImplementedError @property def read_timeout(self) -> Optional[float]: return None @property def connect_timeout(self) -> Optional[float]: return None @property def url(self) -> str: return build_url(host=self.host, target=self.target, params=self.params) PKBSNm33sensebook/_login.pyimport attr import bs4 import re from typing import Dict, Tuple from . import _utils, State REVISION_RE = re.compile(r'"client_revision":(.*?),') FB_DTSG_RE = re.compile(r'name="fb_dtsg" value="(.*?)"') LOGOUT_H_RE = re.compile(r'name=\\"h\\" value=\\"(.*?)\\"') LOGIN_URL = "https://m.facebook.com/login" HOME_URL = "https://facebook.com/home" class LoginError(Exception): pass def get_revision(html: str) -> str: match = REVISION_RE.search(html) if not match: raise LoginError("Could not find `revision`!") return match.group(1) def get_fb_dtsg(html: str) -> str: match = FB_DTSG_RE.search(html) if not match: raise LoginError("Could not find `fb_dtsg`!") return match.group(1) def get_logout_h(html: str) -> str: match = LOGOUT_H_RE.search(html) if not match: raise LoginError("Could not find `logout_h`!") return match.group(1) def get_form_data( html: str, email: str, password: str ) -> Tuple[str, str, Dict[str, str]]: try: method, url, data = _utils.parse_form(html) except ValueError as e: raise LoginError from e # TODO: Better error message data["email"] = email data["pass"] = password if "sign_up" in data: del data["sign_up"] data["login"] = "Log In" return method, url, data def check(state: State, url: str) -> None: if "c_user" not in state.cookies: raise LoginError("Could not login, failed on: {}".format(url)) def get_logout_form_params(html: str) -> Dict[str, str]: return {"ref": "mb", "h": get_logout_h(html)} PKBSN})sensebook/_pull_handler.pyimport attr import random import logging from typing import Optional, Dict, Iterable, Any, List from . import _utils, _abc log = logging.getLogger(__name__) class ProtocolError(Exception): """Raised if some assumption we made about Facebook's protocol is incorrect.""" def __init__(self, msg, data=None): self.data = data if isinstance(data, dict): self.type = data.get("t") else: self.type = None super().__init__(msg) class Backoff(Exception): """Raised to signal the client to wait for the time specified by `delay`.""" max_time = 320 factor = 5 def __init__(self, message, *, delay): super().__init__(message) self.delay = delay @classmethod def from_tries(cls, message, *, tries): if tries > 0: # Exponential backoff delay = min(cls.factor * 2 ** max(0, tries - 1), cls.max_time) # Jitter delay = delay * random.uniform(1.0, 1.5) else: delay = 0 return cls(message, delay=delay) @attr.s(slots=True, kw_only=True, frozen=True) class PullRequest(_abc.Request): """Handles polling for events.""" params = attr.ib(type=Dict[str, Any]) method = "GET" host = "0-edge-chat.facebook.com" target = "/pull" #: The server holds the request open for 50 seconds read_timeout = 60 #: Slighty over a multiple of 3, see `TCP packet retransmission window` connect_timeout = 10 # TODO: Might be a bit too high def parse_body(body: bytes) -> Dict[str, Any]: try: decoded = body.decode("utf-8") except UnicodeDecodeError as e: raise ProtocolError("Invalid unicode data", body) from e try: return _utils.load_json(_utils.strip_json_cruft(decoded)) except ValueError as e: raise ProtocolError("Invalid JSON data", decoded) from e @attr.s(slots=True, kw_only=True) class PullHandler: # _state = attr.ib(type=_abc.State) mark_alive = attr.ib(False, type=bool) _backoff_tries = attr.ib(0, type=int) _clientid = attr.ib(type=str) _sticky_token = attr.ib(None, type=str) _sticky_pool = attr.ib(None, type=str) _seq = attr.ib(0, type=int) @_clientid.default def _default_client_id(self): return _utils.random_hex(31) def _parse_seq(self, data: Any) -> int: # Extract a new `seq` from pull data, or return the old # The JS code handles "sequence regressions", and sends a `msgs_recv` parameter # back, but we won't bother, since their detection is broken (they don't reset # `msgs_recv` when `seq` resets) # `s` takes precedence over `seq` if "s" in data: return int(data["s"]) if "seq" in data: return int(data["seq"]) return self._seq def _handle_status(self, status_code, body): if status_code == 503: # In Facebook's JS code, this delay is set by their servers on every call to # `/ajax/presence/reconnect.php`, as `proxy_down_delay_millis`, but we'll # just set a sensible default raise Backoff("Server is unavailable", delay=60) else: raise ProtocolError( "Unknown server error response: {}".format(status_code), body ) # Type handlers def _handle_type_backoff(self, data): self._backoff_tries += 1 raise Backoff.from_tries( "Server told us to back off", tries=self._backoff_tries ) def _handle_type_batched(self, data): for item in data["batches"]: yield from self.handle_data(item) def _handle_type_continue(self, data): self._backoff_tries = 0 raise ProtocolError("Unused protocol message `continue`", data) def _handle_type_fullReload(self, data): # Not yet sure what consequence this has. # But I know that if this is sent, then some messages/events may not have been # sent to us, so we should query for them with a graphqlbatch-something. self._backoff_tries = 0 if "ms" in data: return data["ms"] def _handle_type_heartbeat(self, data): # Request refresh, no need to do anything log.debug("Heartbeat") def _handle_type_lb(self, data): lb_info = data["lb_info"] self._sticky_token = lb_info["sticky"] if "pool" in lb_info: self._sticky_pool = lb_info["pool"] def _handle_type_msg(self, data): self._backoff_tries = 0 return data["ms"] def _handle_type_refresh(self, data): # We won't perform the call, it's quite complicated, and perhaps unnecessary? raise ProtocolError( "The server told us to call `/ajax/presence/reconnect.php`." "This might mean our data representation is wrong!", data, ) _handle_type_refreshDelay = _handle_type_refresh def _handle_type_test_streaming(self, data): raise ProtocolError("Unused protocol message `test_streaming`", data) # Public methods def handle_data(self, data: Dict[str, Any]) -> Iterable[Any]: # Don't worry if you've never seen a lot of these types, this is implemented # based on reading the JS source for Facebook's `ChannelManager` self._seq = self._parse_seq(data) type_ = data.get("t") method = getattr(self, "_handle_type_{}".format(type_), None) if method: return method(data) or () else: raise ProtocolError("Unknown protocol message", data) def next_request(self) -> _abc.Request: return PullRequest( params={ "clientid": self._clientid, "sticky_token": self._sticky_token, "sticky_pool": self._sticky_pool, "msgs_recv": 0, "seq": self._seq, "state": "active" if self.mark_alive else "offline", } ) def handle_connection_error(self) -> None: self._backoff_tries += 1 raise Backoff.from_tries("Could not pull", tries=self._backoff_tries) # Unsure def handle_connect_timeout(self) -> None: # Keep trying every minute raise Backoff("Connection lost", delay=60) def handle_read_timeout(self) -> None: log.debug("Read timeout") # The server might not send data for a while, so we just try again def handle(self, status_code: int, body: bytes) -> Iterable[Any]: """Handle pull protocol body, and yield data frames ready for further parsing. Raise: `ProtocolError` if some assumption we made about Facebook's protocol was wrong. """ if not _utils.safe_status_code(status_code): self._handle_status(status_code, body) return data = parse_body(body) yield from self.handle_data(data) # class StreamingListener(Listener): # """Handles listening for events, using a streaming pull request""" # def _get_pull_params(self): # rtn = super()._get_pull_params() # rtn["mode"] = "stream" # rtn["format"] = "json" # return rtn # async def pull(self): # try: # r = await self._pull(stream=True) # return list(r.iter_json()) # except (requests.ConnectionError, requests.Timeout): # # If we lost our connection, keep trying every minute # await trio.sleep(60) # return None PKBSNᓺ.hhsensebook/_utils.pyimport bs4 import datetime import json import random import urllib.parse from typing import Dict, Any, Tuple def default_user_agent() -> str: from . import __version__ return "{}/{}".format(__name__.split(".")[0], __version__) def parse_form(html: str) -> Tuple[str, str, Dict[str, str]]: soup = bs4.BeautifulSoup(html, "html.parser") form = soup.form if form is None or not form.has_attr("action"): raise ValueError("Could not find `form` element!") data = { elem["name"]: elem["value"] for elem in form.find_all("input") if elem.has_attr("value") and elem.has_attr("name") } return form.get("method", "GET"), form["action"], data def build_url( *, host: str, target: str, params: Dict[str, Any], secure: bool = True ) -> str: scheme = "https" if secure else "http" query = urllib.parse.urlencode(params) return urllib.parse.urlunsplit((scheme, host, target, query, "")) def strip_json_cruft(text: str) -> str: """Removes `for(;;);` (and other cruft) that preceeds JSON responses""" try: return text[text.index("{") :] except ValueError: raise ValueError("No JSON object found: {!r}".format(text)) def load_json(text: str) -> Any: return json.loads(text) def time_from_millis(timestamp_in_milliseconds: int) -> datetime.datetime: return datetime.datetime.utcfromtimestamp(int(timestamp_in_milliseconds) / 1000) def random_hex(n): return "{:x}".format(random.randint(0, 2 ** n)) def safe_status_code(status_code): return 200 <= status_code < 300 # @decorator.decorator # def raises(func, exception_cls: BaseException = None, *args, **kwargs): # try: # return func(*args, **kwargs) # except Exception as e: # if exception_cls is not None and isinstance(e, exception_cls): # raise # raise InternalError from e PKBSN5%sensebook-0.2.0.dist-info/LICENSE.txtBSD 3-Clause License Copyright (c) 2018, Mads Marquart All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PK!HPOsensebook-0.2.0.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,szd&Y)r$[)T&UrPK!H"sensebook-0.2.0.dist-info/METADATAW]S8}mg28;ޥm5M}`vbˎ,~d1 tѽWWGܱ96+Jh7~'d|Et;Ѵ.Kf؅PE ${KtZ\9I}%VsMFBE=S]Jҕ+P#w}z9KV[Û6Mwt-$ nP |NmU8t"ulea,Aq&鐩Fx*°ױ%oPCJ6_?ZGKHIYI~]4XzN>E3]4XeY+h(۱(?}N FJ@Z]xsb*9I#I5tVK(j>ºsƾ{{76N䵴^mJ^AJ=M'ήl `8aذ}a ۇY ;̵6E_yꆟ>&W M3x`Mmyr6(zv>檌Ξu 痞BpXQTb$ؾF\9ѳp2>8ێM{ҥaweTxSYwݭy.07u.wrcmQ:lP2T~(7luɺF hβR  0 ?w+QnGl!,u1>Yr.I^H.̠o#P#8̡bM2mL3`@SÎ%xve<TF\Ň R>09wJ6#VkmM h^RN`kC!f䰨b+TY<#AC!]& a]xNc4Tc[ZWMߥdtM'lбwfpT!M)1__ 90l#~!Fz?_ijyA eDh#mY$F@3$ J-eMZy o͔gΨmcP DdjXs!E j iL7phy>|fLCQحdC"u~жGI+s+Q!ՀVr^p`&#!SϖZCnzƈb9[@>8vijcpcU}6EjSKCa)դjtqze]Q`̓^JLQuA JP\~aYH<ofew9E_鬕OiӔٓ*Z,WRpN>.Zщz4,mȶSYF_ޒ!$|}8=4O'[@%ݺWe G7yk}`%aK.O҅tq4:KHijtQ PKBSN@sensebook/__init__.pyPKBSNumRsensebook/_abc.pyPKBSNm33sensebook/_login.pyPKBSN})T sensebook/_pull_handler.pyPKBSNᓺ.hh"+sensebook/_utils.pyPKBSN5%2sensebook-0.2.0.dist-info/LICENSE.txtPK!HPO8sensebook-0.2.0.dist-info/WHEELPK!H"t9sensebook-0.2.0.dist-info/METADATAPK!H  a?sensebook-0.2.0.dist-info/RECORDPK UA