PK{4NUy촰sensebook/__init__.py"""Making sense of Facebooks undocumented API.""" __title__ = "SenseBook" __version__ = "0.1.2" __all__ = () from .sansio import State, LoginError, ProtocolError, Listener PK{4N'sensebook/sansio/__init__.pyfrom ._utils import ( default_user_agent, parse_form, build_url, strip_json_cruft, load_json, time_from_millis, random_hex, ) from ._abc import State, ABCRequest from ._login import LoginError from ._listen import ProtocolError, PullRequest, Listener PK{4Nisensebook/sansio/_abc.pyimport abc import attr from typing import Dict, Any, Optional from ._utils import build_url @attr.s(slots=True, kw_only=True) class State(metaclass=abc.ABCMeta): """Core state storing, and methods for logging in/out of Facebook.""" revision = attr.ib(type=str) fb_dtsg = attr.ib(type=str) @property @abc.abstractmethod def cookies(self) -> Dict[str, str]: raise NotImplementedError class ABCRequest(metaclass=abc.ABCMeta): __slots__ = () @property @abc.abstractmethod def method(self) -> str: raise NotImplementedError @property @abc.abstractmethod def host(self) -> str: raise NotImplementedError @property @abc.abstractmethod def target(self) -> str: raise NotImplementedError @property @abc.abstractmethod def params(self) -> Dict[str, Any]: raise NotImplementedError @property def read_timeout(self) -> Optional[float]: return None @property def connect_timeout(self) -> Optional[float]: return None @property def url(self) -> str: return build_url(host=self.host, target=self.target, params=self.params) PK{4Nwesensebook/sansio/_backoff.pyimport attr import random from typing import Optional, ClassVar, Callable @attr.s(slots=True, kw_only=True) class Backoff: func = attr.ib(type=Callable[[float], float]) jitter = attr.ib(type=Callable[[float], float]) _tries = attr.ib(0, type=int) _delay_override = attr.ib(None, type=float) @classmethod def expo(cls, *, max_time, factor, **kwargs) -> "Backoff": def func(tries: float) -> float: return min(factor * 2 ** max(0, tries - 1), max_time) return cls(func=func, **kwargs) @property def tries(self): return self._tries def do(self) -> None: self._tries += 1 def reset(self) -> None: self._tries = 0 def override(self, value: float) -> None: self._delay_override = value def reset_override(self) -> None: self._delay_override = None def get_delay(self) -> Optional[float]: if self._delay_override: return self._delay_override if self.tries > 0: return self._compute_delay(self.tries) return None def get_randomized_delay(self) -> Optional[float]: delay = self.get_delay() if delay is None: return None return self.jitter(delay) PK{4N sensebook/sansio/_listen.pyimport attr import random import logging from typing import Optional, Dict, Iterable, Any, List from . import _utils, _abc, _backoff as backoff log = logging.getLogger(__name__) class ProtocolError(Exception): """Raised if some assumption we made about Facebook's protocol is incorrect.""" def __init__(self, msg, data=None): self.data = data if isinstance(data, dict): self.type = data.get("t") else: self.type = None super().__init__(msg) @attr.s(slots=True, kw_only=True) class PullRequest(_abc.ABCRequest): """Handles polling for events.""" params = attr.ib(type=Dict[str, Any]) method = "GET" host = "0-edge-chat.facebook.com" target = "/pull" #: The server holds the request open for 50 seconds read_timeout = 60 #: Slighty over a multiple of 3, see `TCP packet retransmission window` connect_timeout = 10 # TODO: Might be a bit too high @attr.s(slots=True, kw_only=True) class Listener: mark_alive = attr.ib(False, type=bool) _backoff = attr.ib(type=backoff.Backoff) _clientid = attr.ib(type=str) _sticky_token = attr.ib(None, type=str) _sticky_pool = attr.ib(None, type=str) _seq = attr.ib(0, type=int) @_backoff.default def _default_backoff(self): def jitter(value): return value * random.uniform(1.0, 1.5) return backoff.Backoff.expo(max_time=320, factor=5, jitter=jitter) @_clientid.default def _default_client_id(self): return _utils.random_hex(31) def _parse_seq(self, data: Any) -> int: # Extract a new `seq` from pull data, or return the old # The JS code handles "sequence regressions", and sends a `msgs_recv` parameter # back, but we won't bother, since their detection is broken (they don't reset # `msgs_recv` when `seq` resets) # `s` takes precedence over `seq` if "s" in data: return int(data["s"]) if "seq" in data: return int(data["seq"]) return self._seq @staticmethod def _safe_status_code(status_code): return 200 <= status_code < 300 def _handle_status(self, status_code, body): if status_code == 503: # In Facebook's JS code, this delay is set by their servers on every call to # `/ajax/presence/reconnect.php`, as `proxy_down_delay_millis`, but we'll # just set a sensible default self._backoff.override(60) log.error("Server is unavailable") else: raise ProtocolError( "Unknown server error response: {}".format(status_code), body ) def _parse_body(self, body: bytes) -> Dict[str, Any]: try: decoded = body.decode("utf-8") except UnicodeDecodeError as e: raise ProtocolError("Invalid unicode data", body) from e try: return _utils.load_json(_utils.strip_json_cruft(decoded)) except ValueError as e: raise ProtocolError("Invalid JSON data", body) from e def _handle_data(self, data: Dict[str, Any]) -> Iterable[Any]: # Don't worry if you've never seen a lot of these types, this is implemented # based on reading the JS source for Facebook's `ChannelManager` self._seq = self._parse_seq(data) type_ = data.get("t") method = getattr(self, "_handle_type_{}".format(type_), None) if method: return method(data) or () else: raise ProtocolError("Unknown protocol message", data) # Type handlers def _handle_type_backoff(self, data): log.warning("Server told us to back off") self._backoff.do() def _handle_type_batched(self, data): for item in data["batches"]: yield from self._handle_data(item) def _handle_type_continue(self, data): self._backoff.reset() raise ProtocolError("Unused protocol message `test_streaming`", data) def _handle_type_fullReload(self, data): # Not yet sure what consequence this has. # But I know that if this is sent, then some messages/events may not have been # sent to us, so we should query for them with a graphqlbatch-something. self._backoff.reset() if "ms" in data: return data["ms"] def _handle_type_heartbeat(self, data): # Request refresh, no need to do anything log.debug("Heartbeat") def _handle_type_lb(self, data): lb_info = data["lb_info"] self._sticky_token = lb_info["sticky"] if "pool" in lb_info: self._sticky_pool = lb_info["pool"] def _handle_type_msg(self, data): self._backoff.reset() return data["ms"] def _handle_type_refresh(self, data): # We don't perform the call, it's quite complicated, and perhaps unnecessary? raise ProtocolError( "The server told us to call `/ajax/presence/reconnect.php`." "This might mean our data representation is wrong!", data, ) _handle_type_refreshDelay = _handle_type_refresh def _handle_type_test_streaming(self, data): raise ProtocolError("Unused protocol message `test_streaming`", data) # Public methods def get_delay(self) -> Optional[float]: return self._backoff.get_randomized_delay() def next_request(self) -> PullRequest: self._backoff.reset_override() # TODO: Not sure if putting this here is correct return PullRequest( params={ "clientid": self._clientid, "sticky_token": self._sticky_token, "sticky_pool": self._sticky_pool, "msgs_recv": 0, "seq": self._seq, "state": "active" if self.mark_alive else "offline", } ) def handle_connection_error(self) -> None: log.exception("Could not pull") self._backoff.do() # Unsure def handle_connect_timeout(self) -> None: log.exception("Connection lost") # Keep trying every minute self._backoff.override(60) def handle_read_timeout(self) -> None: log.debug("Read timeout") # The server might not send data for a while, so we just try again def handle(self, status_code: int, body: bytes) -> Iterable[Any]: """Handle pull protocol body, and yield data frames ready for further parsing""" if not self._safe_status_code(status_code): self._handle_status(status_code, body) return data = self._parse_body(body) yield from self._handle_data(data) # class StreamingListener(Listener): # """Handles listening for events, using a streaming pull request""" # def _get_pull_params(self): # rtn = super()._get_pull_params() # rtn["mode"] = "stream" # rtn["format"] = "json" # return rtn # async def pull(self): # try: # r = await self._pull(stream=True) # return list(r.iter_json()) # except (requests.ConnectionError, requests.Timeout): # # If we lost our connection, keep trying every minute # await trio.sleep(60) # return None PK{4Nm33sensebook/sansio/_login.pyimport attr import bs4 import re from typing import Dict, Tuple from . import _utils, State REVISION_RE = re.compile(r'"client_revision":(.*?),') FB_DTSG_RE = re.compile(r'name="fb_dtsg" value="(.*?)"') LOGOUT_H_RE = re.compile(r'name=\\"h\\" value=\\"(.*?)\\"') LOGIN_URL = "https://m.facebook.com/login" HOME_URL = "https://facebook.com/home" class LoginError(Exception): pass def get_revision(html: str) -> str: match = REVISION_RE.search(html) if not match: raise LoginError("Could not find `revision`!") return match.group(1) def get_fb_dtsg(html: str) -> str: match = FB_DTSG_RE.search(html) if not match: raise LoginError("Could not find `fb_dtsg`!") return match.group(1) def get_logout_h(html: str) -> str: match = LOGOUT_H_RE.search(html) if not match: raise LoginError("Could not find `logout_h`!") return match.group(1) def get_form_data( html: str, email: str, password: str ) -> Tuple[str, str, Dict[str, str]]: try: method, url, data = _utils.parse_form(html) except ValueError as e: raise LoginError from e # TODO: Better error message data["email"] = email data["pass"] = password if "sign_up" in data: del data["sign_up"] data["login"] = "Log In" return method, url, data def check(state: State, url: str) -> None: if "c_user" not in state.cookies: raise LoginError("Could not login, failed on: {}".format(url)) def get_logout_form_params(html: str) -> Dict[str, str]: return {"ref": "mb", "h": get_logout_h(html)} PK{4NTsensebook/sansio/_utils.pyimport bs4 import datetime import json import random import urllib.parse from typing import Dict, Any, Tuple from .. import __version__ def default_user_agent() -> str: return "{}/{}".format(__name__.split(".")[0], __version__) def parse_form(html: str) -> Tuple[str, str, Dict[str, str]]: soup = bs4.BeautifulSoup(html, "html.parser") form = soup.form if form is None or not form.has_attr("action"): raise ValueError("Could not find `form` element!") data = { elem["name"]: elem["value"] for elem in form.find_all("input") if elem.has_attr("value") and elem.has_attr("name") } return form.get("method", "GET"), form["action"], data def build_url( *, host: str, target: str, params: Dict[str, Any], secure: bool = True ) -> str: scheme = "https" if secure else "http" query = urllib.parse.urlencode(params) return urllib.parse.urlunsplit((scheme, host, target, query, "")) def strip_json_cruft(text: str) -> str: """Removes `for(;;);` (and other cruft) that preceeds JSON responses""" try: return text[text.index("{") :] except ValueError: raise ValueError("No JSON object found: {!r}".format(text)) def load_json(text: str) -> Any: return json.loads(text) def time_from_millis(timestamp_in_milliseconds: int) -> datetime.datetime: return datetime.datetime.utcfromtimestamp(int(timestamp_in_milliseconds) / 1000) def random_hex(n): return "{:x}".format(random.randint(0, 2 ** n)) # @decorator.decorator # def raises(func, exception_cls: BaseException = None, *args, **kwargs): # try: # return func(*args, **kwargs) # except Exception as e: # if exception_cls is not None and isinstance(e, exception_cls): # raise # raise InternalError from e PK{4N,vIsensebook/sync/__init__.pyfrom ._core import State from ._listen import Listener def login(email: str, password: str) -> State: return State.login(email, password) PK{4N#sensebook/sync/_core.pyimport attr import requests from typing import Dict, TypeVar, Type from .. import sansio from ..sansio import _login T = TypeVar("T", bound="State") @attr.s(slots=True, kw_only=True) class State(sansio.State): _session = attr.ib(type=requests.Session) @_session.default @staticmethod def _default_session(): session = requests.Session() session.headers["User-Agent"] = sansio.default_user_agent() return session @property def cookies(self): return self._session.cookies def request(self, method, url, **kwargs): if url.startswith("/"): url = "https://facebook.com" + url return self._session.request(method, url, **kwargs) def get(self, url, **kwargs): return self._session.request("GET", url, **kwargs) def post(self, url, **kwargs): return self._session.request("POST", url, **kwargs) @classmethod def login(cls: Type[T], email: str, password: str) -> T: session = cls._default_session() r = session.get(_login.LOGIN_URL) method, url, data = _login.get_form_data(r.text, email, password) r = session.request(method, url, data=data) _login.check(session, r.url) r = session.get(_login.HOME_URL) return cls( fb_dtsg=_login.get_fb_dtsg(r.text), revision=_login.get_revision(r.text), session=session, ) def logout(self) -> None: """Properly log out and invalidate the session""" r = self.post("/bluebar/modern_settings_menu/", data={"pmid": "4"}) params = _login.get_logout_form_params(r.text) self.get("/logout.php", params=params) def is_logged_in(self) -> bool: """Check the login status Return: Whether the session is still logged in """ # Call the login url, and see if we're redirected to the home page r = self.get(_login.LOGIN_URL, allow_redirects=False) return "Location" in r.headers and "home" in r.headers["Location"] PK{4N1asensebook/sync/_listen.pyimport attr import requests import time from typing import Any, Iterable, Optional from .. import sansio from . import State @attr.s(slots=True, kw_only=True) class Listener: _state = attr.ib(type=State) _listener = attr.ib(factory=sansio.Listener, type=sansio.Listener) def _sleep(self) -> None: delay = self._listener.get_delay() if delay is not None: print("Sleeping for {} seconds.".format(delay)) time.sleep(delay) def _step(self) -> Iterable[Any]: request = self._listener.next_request() try: r = self._state.request( request.method, request.url, timeout=(request.connect_timeout, request.read_timeout), ) except requests.exceptions.ConnectionError: self._listener.handle_connection_error() except requests.exceptions.ConnectTimeout: self._listener.handle_connect_timeout() except requests.exceptions.ReadTimeout: self._listener.handle_read_timeout() else: yield from self._listener.handle(r.status_code, r.content) def pull(self) -> Iterable[Any]: while True: self._sleep() yield from self._step() PK{4N5%sensebook-0.1.2.dist-info/LICENSE.txtBSD 3-Clause License Copyright (c) 2018, Mads Marquart All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PK!H>*RQsensebook-0.1.2.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,rzd&Y)r$[)T&UrPK!H? "sensebook-0.1.2.dist-info/METADATAWr6}W$g$*v֙ƒ3QE)̓C$(!}HZ6-K^4$xvY<íF'EL׎ύ6_{~4-bJ] hdrzZQ3k3z7x` (^z_d0X,qj@)X%^Gjml撍1zH'Ar-o4j3iʴϢw_~YߞbB&Ժ"O{),wI hx{*~ӣ?_/wE0ī}V̬u0?+.M)\,% Q&i N;qК"Lpnu 3c:7V5TxFEՑk5+I3NrF2^Oe^? )ܲXL&#[iMM Ocֲz\>Y'JU!udS,nT.E3S`*Hkh%wc;=}7$܇/_ >f#k#f ܯNNv~R'&+%mH8޺{>>9+KLY Q 35oZ2o. )7&R 2"Qu.Enż ܒDó׳_gYm-0n Fך񎢜YʭQe7?pdz^9JK2/h*7;;17YR<+4#,cJ-Ma2KWgQPK!HQ1g sensebook-0.1.2.dist-info/RECORD}r@} \]@+؂`hO6r*V^s(n(ڶzOҐ>IfSŒ«*p9͇Mw-07{,:sEa,MJWNVrJ[*+/n]k Lac{<UIz8aPeW8rEF ҼjO't{+V^ U/Q ln]ŚP|քy2֬˝moD!%}~fS$[y2ے<QSnY}Lqd:%;TK/wzR<>V:fgd,{rV)_pMglW5\a)G٫ 7j4Q1o;! FZMz?[S/8yxSrQ[>bHBY_t4`%+l-XWmiB_"KS4 Y|k@\K[F$hQY+~J+az 5lSzkȶ!?e$l9Fk#Tu5T 02K]{rabA߅b._$:"9|C:üPK{4NUy촰sensebook/__init__.pyPK{4N'sensebook/sansio/__init__.pyPK{4Ni5sensebook/sansio/_abc.pyPK{4Nwesensebook/sansio/_backoff.pyPK{4N 1 sensebook/sansio/_listen.pyPK{4Nm33(sensebook/sansio/_login.pyPK{4NTZ/sensebook/sansio/_utils.pyPK{4N,vI6sensebook/sync/__init__.pyPK{4N#v7sensebook/sync/_core.pyPK{4N1a?sensebook/sync/_listen.pyPK{4N5%Dsensebook-0.1.2.dist-info/LICENSE.txtPK!H>*RQKsensebook-0.1.2.dist-info/WHEELPK!H? "Ksensebook-0.1.2.dist-info/METADATAPK!HQ1g *Qsensebook-0.1.2.dist-info/RECORDPKS