PK!mastools/__init__.pyPK!Jmastools/models/__init__.py"""Standard imports for mastools.models.""" from .accounts import Accounts from .base import session_for __all__ = ["session_for", "Accounts"] PK!Ly^^mastools/models/accounts.py"""Model the Accounts table.""" from sqlalchemy import Column, DateTime, Integer, JSON, String, Text from .base import Base class Accounts(Base): """The Accounts table.""" __tablename__ = "accounts" id = Column(Integer, primary_key=True) username = Column(String) domain = Column(String) created_at = Column(DateTime) note = Column(Text) fields = Column(JSON) suspended_at = Column(DateTime) # Note: columns are added as-needed. If we get overly ambitious and add everything at once, this is # more likely to drift out of date with the upstream Mastodon table definitions. # As of 2019-09-24, upstream defines these columns: # *username | character varying | | not null | ''::character varying # *domain | character varying | | | # secret | character varying | | not null | ''::character varying # private_key | text | | | # public_key | text | | not null | ''::text # remote_url | character varying | | not null | ''::character varying # salmon_url | character varying | | not null | ''::character varying # hub_url | character varying | | not null | ''::character varying # *created_at | timestamp without time zone | | not null | # updated_at | timestamp without time zone | | not null | # *note | text | | not null | ''::text # display_name | character varying | | not null | ''::character varying # uri | character varying | | not null | ''::character varying # url | character varying | | | # avatar_file_name | character varying | | | # avatar_content_type | character varying | | | # avatar_file_size | integer | | | # avatar_updated_at | timestamp without time zone | | | # header_file_name | character varying | | | # header_content_type | character varying | | | # header_file_size | integer | | | # header_updated_at | timestamp without time zone | | | # avatar_remote_url | character varying | | | # subscription_expires_at | timestamp without time zone | | | # locked | boolean | | not null | false # header_remote_url | character varying | | not null | ''::character varying # last_webfingered_at | timestamp without time zone | | | # inbox_url | character varying | | not null | ''::character varying # outbox_url | character varying | | not null | ''::character varying # shared_inbox_url | character varying | | not null | ''::character varying # followers_url | character varying | | not null | ''::character varying # protocol | integer | | not null | 0 # id | bigint | | not null | nextval('accounts_id_seq'::regclass) # memorial | boolean | | not null | false # moved_to_account_id | bigint | | | # featured_collection_url | character varying | | | # *fields | jsonb | | | # actor_type | character varying | | | # discoverable | boolean | | | # also_known_as | character varying[] | | | # silenced_at | timestamp without time zone | | | # *suspended_at | timestamp without time zone | | | PK![q9WPPmastools/models/base.py"""Common database things used everywhere.""" from functools import lru_cache from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() @lru_cache() def session_for(*, host, database, user, password, port=5432): """Return a (possibly cached) session for the connection details.""" engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}") engine.connect() Session = sessionmaker(bind=engine) session = Session() return session PK!%mastools/scripts/show_user_changes.py#!/usr/bin/env python """ Show any new, changed, or deleted accounts that mention URLs in their account info. This is super common for spammers, who like to stuff their crummy website's info into every single field possible. Suppose you run this hourly and email yourself the results (which will usually be empty unless your instance is *very* busy). Now you can catch those "https://support-foo-corp/" spammers before they have a chance to post! """ import argparse import json from pathlib import Path from mastools.models import session_for, Accounts CACHE_FILE = "~/.mastools/usercache.json" CONFIG_FILE = "~/.mastools/config.json" def has_url(account: Accounts) -> str: """Return True if the account's note or fields seem to contain a URL.""" if account.note and "http" in account.note.lower(): return True if "http" in str(account.fields).lower(): return True return False def users_with_urls(session): """Return a dictionary of usernames to their account info when they mention URLs.""" query = ( session.query(Accounts) .filter(Accounts.domain == None) # pylint: disable=singleton-comparison .filter(Accounts.suspended_at == None) # pylint: disable=singleton-comparison .order_by(Accounts.created_at) ) return { account.username: {"fields": account.fields, "note": account.note} for account in query if has_url(account) } def handle_command_line(): """Fetch all changed current users with URLs in their account info and show any changes.""" parser = argparse.ArgumentParser(description=handle_command_line.__doc__) parser.parse_args() cache_file = Path(CACHE_FILE).expanduser() config_file = Path(CONFIG_FILE).expanduser() config = json.loads(config_file.read_text()) session = session_for(**config) # Try to get the results of the last run, but fall back to an empty dict if that's not # available. That's most likely to happen on the first run. try: old_users = json.loads(cache_file.read_text())["users"] except FileNotFoundError: old_users = {} new_users = users_with_urls(session) for username, new_data in new_users.items(): try: old_data = old_users.pop(username) except KeyError: # If the username isn't in the old data, then they're new. Report than and move on to # the next account. print("New user:", username) print("Fields:", new_data["fields"]) print("Note:", new_data["note"]) print("-" * 30) continue if old_data == new_data: continue # Something's changed since the last time we saw this user. Report that. print("Changed user:", username) print("Old Fields:", old_data["fields"]) print("Old Note:", old_data["note"]) print("New Fields:", new_data["fields"]) print("New Note:", new_data["note"]) print("-" * 30) # Report any leftover old accounts that aren't in the new accounts. They were probably # suspended. for username, old_data in old_users.items(): print("Deleted user:", username) print("Old Fields:", old_data["fields"]) print("Old Note:", old_data["note"]) print("-" * 30) # Save these results for the next run. Include the version information and nest the user # information inside a "users" key from the start, because experience says if we don't do this # then the next release will add a feature that requires a change in the data layout, and then # we'll have to write a data migration or something. cache_file.write_text(json.dumps({"users": new_users, "version": 1})) PK!Hes.L\)mastools-0.1.0.dist-info/entry_points.txtN+I/N.,()*/--N-MHKO-M,.)փ))*R)@Css̼T..PK!HڽTUmastools-0.1.0.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!Hàh!mastools-0.1.0.dist-info/METADATAN0NMX36LD`A>nѰm#jλ|_wEKt?ee F9j15G,CDNk]7TbTSA,] š4^bMA !)nqO"_R԰hֹa,vLhC i>PK!Heآmastools-0.1.0.dist-info/RECORDuIs@}~ f]ȠddÆBli&톈.X C_qp]t0~ʦt, NNъ-W&Kwx*pJcrQz\kh*}VR /