PK!mastools/__init__.pyPK!Jmastools/models/__init__.py"""Standard imports for mastools.models.""" from .accounts import Accounts from .base import session_for __all__ = ["session_for", "Accounts"] PK!Ly^^mastools/models/accounts.py"""Model the Accounts table.""" from sqlalchemy import Column, DateTime, Integer, JSON, String, Text from .base import Base class Accounts(Base): """The Accounts table.""" __tablename__ = "accounts" id = Column(Integer, primary_key=True) username = Column(String) domain = Column(String) created_at = Column(DateTime) note = Column(Text) fields = Column(JSON) suspended_at = Column(DateTime) # Note: columns are added as-needed. If we get overly ambitious and add everything at once, this is # more likely to drift out of date with the upstream Mastodon table definitions. # As of 2019-09-24, upstream defines these columns: # *username | character varying | | not null | ''::character varying # *domain | character varying | | | # secret | character varying | | not null | ''::character varying # private_key | text | | | # public_key | text | | not null | ''::text # remote_url | character varying | | not null | ''::character varying # salmon_url | character varying | | not null | ''::character varying # hub_url | character varying | | not null | ''::character varying # *created_at | timestamp without time zone | | not null | # updated_at | timestamp without time zone | | not null | # *note | text | | not null | ''::text # display_name | character varying | | not null | ''::character varying # uri | character varying | | not null | ''::character varying # url | character varying | | | # avatar_file_name | character varying | | | # avatar_content_type | character varying | | | # avatar_file_size | integer | | | # avatar_updated_at | timestamp without time zone | | | # header_file_name | character varying | | | # header_content_type | character varying | | | # header_file_size | integer | | | # header_updated_at | timestamp without time zone | | | # avatar_remote_url | character varying | | | # subscription_expires_at | timestamp without time zone | | | # locked | boolean | | not null | false # header_remote_url | character varying | | not null | ''::character varying # last_webfingered_at | timestamp without time zone | | | # inbox_url | character varying | | not null | ''::character varying # outbox_url | character varying | | not null | ''::character varying # shared_inbox_url | character varying | | not null | ''::character varying # followers_url | character varying | | not null | ''::character varying # protocol | integer | | not null | 0 # id | bigint | | not null | nextval('accounts_id_seq'::regclass) # memorial | boolean | | not null | false # moved_to_account_id | bigint | | | # featured_collection_url | character varying | | | # *fields | jsonb | | | # actor_type | character varying | | | # discoverable | boolean | | | # also_known_as | character varying[] | | | # silenced_at | timestamp without time zone | | | # *suspended_at | timestamp without time zone | | | PK!Qmastools/models/base.py"""Common database things used everywhere.""" from functools import lru_cache from psycopg2 import connect from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() @lru_cache() def session_for(*, host, database, user, password, port=5432): """Return a (possibly cached) session for the connection details.""" def pg_connect(): """Return a connection to the Mastodon database.""" return connect(host=host, database=database, user=user, password=password, port=port) engine = create_engine(f"postgresql+psycopg2://", creator=pg_connect) Session = sessionmaker(bind=engine) session = Session() return session PK!R??mastools/scripts/common.py"""Common things used by all scripts.""" import json from pathlib import Path MASTOOLS_DIR = Path("~/.mastools").expanduser() CONFIG_FILE = MASTOOLS_DIR / "config.json" def get_config(): """Return the parsed contents of the config file.""" return json.loads(CONFIG_FILE.read_text()) def cache_file(cache_key): """Return the Path of the cache file for the key.""" return MASTOOLS_DIR / f"{cache_key}_cache.json" def load_cache(cache_key, version): """Return the contents of the cache for the key, if its version is correct.""" # Try to get the results of the last run, but fall back to an empty dict if that's not # available. That's most likely to happen on the first run. try: cache = json.loads(cache_file(cache_key).read_text()) except FileNotFoundError: return {} if cache["version"] != version: raise ValueError( f"Unknown {cache_key} version number: expected {version}, got {cache['version']}" ) return cache[cache_key] def save_cache(cache_key, version, data): """Write the data to the cache for the key.""" # Save these results for the next run. Include the version information and nest the user # information inside a "users" key from the start, because experience says if we don't do this # then the next release will add a feature that requires a change in the data layout, and then # we'll have to write a data migration or something. cache_data = {cache_key: data, "version": version} cache_file(cache_key).write_text(json.dumps(cache_data, indent=2)) PK!7/%mastools/scripts/show_user_changes.py#!/usr/bin/env python """ Show any new, changed, or deleted accounts that mention URLs in their account info. This is super common for spammers, who like to stuff their crummy website's info into every single field possible. Suppose you run this hourly and email yourself the results (which will usually be empty unless your instance is *very* busy). Now you can catch those "https://support-foo-corp/" spammers before they have a chance to post! """ import argparse from operator import itemgetter from mastools.models import session_for, Accounts from mastools.scripts import common CACHE_KEY = "users" CACHE_VERSION = 1 def has_url(account: Accounts) -> bool: """Return True if the account's note or fields seem to contain a URL.""" if account.note and "http" in account.note.lower(): return True if "http" in str(account.fields).lower(): return True return False def users_with_urls(session): """Return a dictionary of usernames to their account info when they mention URLs.""" query = ( session.query(Accounts) .filter(Accounts.domain == None) # pylint: disable=singleton-comparison .filter(Accounts.suspended_at == None) # pylint: disable=singleton-comparison .order_by(Accounts.created_at) ) return { account.username: {"fields": account.fields, "note": account.note} for account in query if has_url(account) } def render_field_changes(old_fields, new_fields): """Pretty-print changes in a user's bio fields.""" if not (old_fields or new_fields): yield " " return if sorted(old_fields, key=itemgetter("name", "value")) == sorted( new_fields, key=itemgetter("name", "value") ): yield " " return old_set = {(field["name"], field["value"]) for field in old_fields} new_set = {(field["name"], field["value"]) for field in new_fields} for field in sorted(old_set - new_set): yield f" - {field[0]!r}: {field[1]!r}" for field in sorted(old_set & new_set): yield f" {field[0]!r}: {field[1]!r}" for field in sorted(new_set - old_set): yield f" + {field[0]!r}: {field[1]!r}" def render_note_changes(old_note, new_note): """Pretty-print changes in a user's bio note.""" if not (old_note or new_note): yield " " return if old_note == new_note: yield " " return # Returning the repr (`!r`) protects from email header injection by crafty users. See # https://www.thesitewizard.com/php/protect-script-from-email-injection.shtml for an # explanation. if old_note: yield f" - {old_note!r}" if new_note: yield f" + {new_note!r}" def render_new_user(username, data): """Pretty-print information about a new user.""" yield f"New user: {username}" yield " fields:" yield from render_field_changes({}, data["fields"]) yield " note:" yield from render_note_changes("", data["note"]) def render_changed_user(username, old_data, new_data): """Pretty-print information about a changed user.""" yield f"Changed user: {username}" yield " fields:" yield from render_field_changes(old_data["fields"], new_data["fields"]) yield " note:" yield from render_note_changes(old_data["note"], new_data["note"]) def render_deleted_user(username, data): """Pretty-print information about a deleted user.""" yield f"Deleted user: {username}" yield " fields:" yield from render_field_changes(data["fields"], {}) yield " note:" yield from render_note_changes(data["note"], "") def show_output(gen): """Print each line of output to stdout, then a blank line. Building the output this way is a little unusual, but it's much easier to test. Also, adopting this convention means that we don't have to build up the output inside each rendering function, so they can be as simple as possible and not have to track their own state. """ for line in gen: print(line) print() def handle_command_line(): """Fetch all changed current users with URLs in their account info and show any changes.""" parser = argparse.ArgumentParser(description=handle_command_line.__doc__) parser.parse_args() session = session_for(**common.get_config()) old_users = common.load_cache(CACHE_KEY, CACHE_VERSION) new_users = users_with_urls(session) for username, new_data in new_users.items(): try: old_data = old_users.pop(username) except KeyError: # If the username isn't in the old data, then they're new. Report than and move on to # the next account. show_output(render_new_user(username, new_data)) continue if old_data != new_data: # Something's changed since the last time we saw this user. Report that. show_output(render_changed_user(username, old_data, new_data)) # Report any leftover old accounts that aren't in the new accounts. They were probably # suspended. for username, old_data in old_users.items(): show_output(render_deleted_user(username, old_data)) common.save_cache(CACHE_KEY, CACHE_VERSION, new_users) PK!Hes.L\)mastools-0.1.3.dist-info/entry_points.txtN+I/N.,()*/--N-MHKO-M,.)փ))*R)@Css̼T..PK!HڽTUmastools-0.1.3.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!HF,] !mastools-0.1.3.dist-info/METADATAVko6_q|pYtjŌ8E%i+ib"*IUP'm:&@E򌝌lń=q.sP.:b71\zBW~mH*V+Ñj(3&EK>RZ/uA)WH:Wh[ #HfA]I<:Ug'WbZT ͝eӭKM{n'\\0 ?; cqIkUQT4Ы M5\.^$XU8o3|}=~=LY7֑.WA8~,J9z> C2x%ån☪ƭ7 {#lZ maSw sEERj˳x룆dQ8XN*:7@"Qt :7&*c*'1-=QXV OX,[%?;ngB;7v;{~^bn@?,%s#Y!AJԥpĆ/gӮ>1EwOW_+K6՛C Xc 8, 3v"].<]m*W~mUsxI%=Mz*XW%I)2qL/r~|HąXZL)zb0 8yU^z6Iue0ʞɍ]gLx^AX zIU5@ʚ T[8n%LP>s$ 9DN?z3.H"mζȅVr΋QS ?ʼOn͑A} J^%ӣ(6oQb=Zbt! dKZ BjRklĸZjy/7`L`LhpC JݰaWl tԀe_]+\څ;o7U !VEBxu.XDu} t]GiN>3D/.gxuyEdz@Fkgšk8oXJ"/0?>1 `N.h:]4 l`wF6 BQBM#ոGsY6 @2l4J0L 9' VV'n# 7hݼw4? jqՈMsx?ҕ++;u|<9nhW >++Hl֥̀CB ټPK!H"mastools-0.1.3.dist-info/RECORDuɒ@{ ˡ "6ȅ,-(b+~b̬u?8GyNjP&(} Z?6?nyJ7+*ѱ3<< 3|TO`RÅ KG*8Bn(XAE&YV/d-ˆ{'deu^xl};)Ԗٝo GǨA~i݃O#S\mNp "L3,p8ǟY]U5z{ULVM;ct4Sa_RIzTD&1x3;xoH#q=+bq범[2sntn'@}G3aƄ!I"jyS>`d\X> r`#up:(ΏyYcY 0et"rsA){}m%d{AhdNj5Tː+7ho\ ɲ^)PK!mastools/__init__.pyPK!J2mastools/models/__init__.pyPK!Ly^^mastools/models/accounts.pyPK!Qmastools/models/base.pyPK!R??mastools/scripts/common.pyPK!7/%3mastools/scripts/show_user_changes.pyPK!Hes.L\)>1mastools-0.1.3.dist-info/entry_points.txtPK!HڽTU1mastools-0.1.3.dist-info/WHEELPK!HF,] !a2mastools-0.1.3.dist-info/METADATAPK!H"7mastools-0.1.3.dist-info/RECORDPK *: