PK!~^wresults/__init__.pyfrom logx import log from sqlbag import create_database # noqa from sqlbag.pg import use_pendulum_for_time_types # noqa from .cleaning import standardize_key, standardized_key_mapping # noqa from .connections import db # noqa from .fileutil import file_text, files, from_file, from_files # noqa from .itercsv import fast_csv_it # noqa from .openers import ( # noqa csv_column_names, detect_enc, detect_string_enc, dicts_from_rows, first_n_lines, from_csv, from_xls, from_xlsx, smart_open, ) from .resources import ( # noqa resource_data, resource_path, resource_stream, resource_text, ) from .result import Result # noqa from .resultset import Results # noqa from .sqlutil import create_table_statement # noqa from .typeguess import ( # noqa guess_column_type, guess_sql_column_type, guess_value_type, ) from .uuids import deterministic_uuid # noqa log.set_null_handler() use_pendulum_for_time_types() PK!SCresults/annotations.pyfrom numbers import Number class AnnotationsMixin: def annotate_histogram_amplitudes(self): def is_numeric(x): return isinstance(x, Number) and not isinstance(x, bool) for k in self.keys(): col = self[k] filtered = [_ for _ in col if is_numeric(_)] if not filtered: continue _min = min(filtered) _max = max(filtered) min0 = min(_min, 0.0) max0 = max(0.0, _max) _range = max0 - min0 _subclasses_by_type = {} for r in self: v = r[k] if not is_numeric(v): continue v0, v1 = sorted([0.0, v]) try: d = v0 - min0 start = d / _range h0 = start except ZeroDivisionError: h0 = 0.0 try: d = v1 - min0 end = d / _range h1 = end except ZeroDivisionError: h1 = 0.0 datatype = type(v) if datatype not in _subclasses_by_type: Subclass = type(datatype.__name__.title(), (datatype,), {}) _subclasses_by_type[datatype] = Subclass Subclass = _subclasses_by_type[datatype] replacement = Subclass(v) replacement.histo = (h0 * 100, h1 * 100) r[k] = replacement PK!~z  results/cleaning.pyimport string ALLOWED = string.ascii_lowercase + "_" + string.digits allowed = {k: True for k in ALLOWED} def whitespace_to_underscore(x): return "_".join(x.split()) def standardize_key(k): cleaned = k.strip().lower().encode("ascii", errors="ignore").decode() underscored = whitespace_to_underscore(cleaned) return filtered(underscored) def filtered(x): return "".join(_ for _ in x if _ in allowed) def standardized_key_mapping(keyslist): return {k: standardize_key(k) for k in keyslist} PK!>;Ǎ22results/connections.pyfrom contextlib import contextmanager from functools import partial from pathlib import Path from schemainspect import get_inspector from sqlbag import S, raw_execute from .inserting import insert from .migration import SchemaManagement from .paging import Paging, paging_wrapped_query from .result import Result from .resultset import resultproxy_to_results def build_proc_call_query(_proc_name, *args, **kwargs): _proc_name = _proc_name.replace("__", ".") params = {f"positional{i}": x for i, x in enumerate(args)} params.update(**dict(kwargs)) paramnames = params.keys() bindparams = [f":{name}" for name in list(paramnames)] paramspec = ", ".join(bindparams) query = f"select * from {_proc_name}({paramspec})" return query, params class transactionprocs: def __init__(self, sess): self.session = sess def __getattr__(self, name): return partial(self.call, name) def call(self, _proc_name, *args, **kwargs): query, params = build_proc_call_query(_proc_name, *args, **kwargs) return self.session.ex(query, params) class transaction: def __init__(self, s): self.s = s self.procs = transactionprocs(self) def ex(self, *args, execution_options=None, **kwargs): if execution_options: _resultproxy = self.s.connection( execution_options=execution_options ).execute(*args, **kwargs) else: _resultproxy = self.s.execute(*args, **kwargs) results = resultproxy_to_results(_resultproxy) return results def raw_ex(self, *args, execution_options=None, **kwargs): if execution_options: return self.s.connection(execution_options=execution_options).execute( *args, **kwargs ) else: return self.s.execute(*args, **kwargs) def paged( self, query, bookmark, ordering, per_page, backwards, *args, use_top=False, supports_row=True, **kwargs, ): query, page_params = paging_wrapped_query( query, ordering, bookmark, per_page, backwards, use_top, supports_row ) argslist = list(args) try: params = argslist[0] except IndexError: argslist.append({}) params = argslist[0] params.update(page_params) argslist[0] = params args = tuple(argslist) results = self.ex(query, *args, **kwargs) results.paging = Paging(results, per_page, ordering, bookmark, backwards) return results def insert(self, table, rowlist, upsert_on=None, returning=None): return insert(self.s, table, rowlist, upsert_on, returning) class procs: def __init__(self, db): self.db = db def __getattr__(self, name): return partial(self.call, name) def call(self, _proc_name, *args, **kwargs): _proc_name = _proc_name.replace("__", ".") with self.db.transaction() as s: query, params = build_proc_call_query(_proc_name, *args, **kwargs) return s.ex(query, params) class db(SchemaManagement): def __init__(self, *args, **kwargs): self._args = args if args and args[0].startswith("postgres"): kwargs.setdefault("use_batch_mode", True) self._kwargs = kwargs @contextmanager def transaction(self): with S(*self._args, **self._kwargs) as s: yield transaction(s) @property def procs(self): return procs(self) @property def db_url(self): return self._args[0] def ss(self, *args, **kwargs): with self.transaction() as t: return t.ex(*args, **kwargs) def ss_iter(self, *args, **kwargs): with self.transaction() as t: for row in t.raw_ex(*args, **kwargs): yield Result(row) def paged(self, *args, **kwargs): with self.transaction() as t: return t.paged(*args, **kwargs) def raw_from_file(self, f): sql = Path(f).read_text() return self.raw(sql) def ss_from_file(self, f, *args, **kwargs): sql = Path(f).read_text() return self.ss(sql, *args, **kwargs) def paged_from_file(self, f, *args, **kwargs): sql = Path(f).read_text() return self.paged(sql, *args, **kwargs) def insert(self, table, rowlist, upsert_on=None, returning=None): with self.transaction() as s: inserted = s.insert(table, rowlist, upsert_on, returning) return inserted def raw(self, sql): with S(*self._args, **self._kwargs) as s: _results = raw_execute(s, sql) return _results def inspect(self): with S(*self._args, **self._kwargs) as s: i = get_inspector(s) return i PK!results/fileutil.pyfrom collections.abc import Mapping from pathlib import Path from .openers import from_file def files(path, extensions=None): p = Path(path) p = p.expanduser() return [ child for child in sorted(p.iterdir()) if child.is_file and (not extensions or child.suffix in extensions) ] def file_text(fpath): path = Path(fpath) return path.read_text() def is_mapping(x): return isinstance(x, Mapping) def from_files_it(files, ignore_unopenable=True): for f in files: try: results = from_file(f) except ValueError: if ignore_unopenable: continue raise if is_mapping(results): for k, result in results.items(): yield f.with_name(f"{f.name}::{k}"), result else: yield f, results def from_files(files, ignore_unopenable=True): return dict(from_files_it(files, ignore_unopenable)) PK!w??results/inserting.pyimport string from .fileutil import is_mapping from .resultset import resultproxy_to_results INSERT = """ insert into {table} ({colspec}) values ({valuespec}) """ INSERT_UPSERT = """ on conflict ({upsertkeyspec}) do update set {upsertspec} """ INSERT_UPSERT = """ on conflict ({upsertkeyspec}) do update set {upsertspec} """ INSERT_UPSERT_DO_NOTHING = """ on conflict ({upsertkeyspec}) do nothing """ VALID = {k: None for k in (string.ascii_letters + string.digits + "_")} def valid(key): clist = [_ for _ in list(key) if _ in VALID] v = "".join(clist) if v != key: raise ValueError(f'invalid key: "{key}"') return v def insert(s, table, rows, upsert_on=None, returning=None): if is_mapping(rows): rows = [rows] if returning is None: returning = not len(rows) > 1 if not rows: raise ValueError("empty list of rows, nothing to upsert") keys = [valid(k) for k in rows[0].keys()] colspec = ", ".join([f'"{k}"' for k in keys]) valuespec = ", ".join(":{}".format(k) for k in keys) q = INSERT.format(table=table, colspec=colspec, valuespec=valuespec) if upsert_on: upsert_keys = list(keys) for k in upsert_on: upsert_keys.remove(k) upsertkeyspec = ", ".join([f'"{k}"' for k in upsert_on]) if upsert_keys: upsertspec = ", ".join(f'"{k}" = excluded."{k}"' for k in upsert_keys) q_upsert = INSERT_UPSERT.format( upsertkeyspec=upsertkeyspec, upsertspec=upsertspec ) else: q_upsert = INSERT_UPSERT_DO_NOTHING.format(upsertkeyspec=upsertkeyspec) q = q + q_upsert if returning: q += " returning *" rp = s.execute(q, rows) return resultproxy_to_results(rp) PK!xresults/itercsv.pyfrom pathlib import Path from .openers import csv_row_tuples_it, csv_rows_it def fast_csv_it(f, renamed_keys=None, *args, **kwargs): if isinstance(f, str): f = Path(f) if renamed_keys: ii = csv_row_tuples_it(f, *args, **kwargs) next(ii) # skip header row for r in ii: yield dict(zip(renamed_keys, r)) else: for r in csv_rows_it(f, *args, **kwargs): yield r PK!7(results/migration.pyfrom logx import log from migra import Migration from sqlalchemy.pool import NullPool from sqlbag import create_database # noqa from sqlbag import S, temporary_database from . import connections class SchemaManagement: def create_database(self, *args, **kwargs): create_database(self.db_url, *args, **kwargs) def has_same_structure_as(self, other_db_url, **kwargs): return db_structures_are_identical(self.db_url, other_db_url, **kwargs) def creation_statements(self, **kwargs): return creation_statements(self.db_url) def drop_statements(self, **kwargs): return drop_statements(self.db_url) def schema_hash(self): return schema_hash(self.db_url) def db_differences(self, other_db_url, **kwargs): return db_differences(self.db_url, other_db_url, **kwargs) def sync_db_structure_to_target_db( self, target_db_url, confirm=True, create_extensions_only=False, **kwargs ): return sync_db_structure_to_target_db( self.db_url, target_db_url, confirm=confirm, create_extensions_only=create_extensions_only, **kwargs, ) def sync_db_structure_to_setup_method( self, setup_method, confirm=True, create_extensions_only=False, **kwargs ): sync_db_structure_to_setup_method( self.db_url, setup_method, confirm=confirm, create_extensions_only=create_extensions_only, **kwargs, ) def sync_db_structure_to_definition( self, definition_text, confirm=True, create_extensions_only=False, **kwargs ): sync_db_structure_to_definition( self.db_url, definition_text, confirm=confirm, create_extensions_only=create_extensions_only, **kwargs, ) def prompt(question, prompt=True): # pragma: no cover print(question + " ", end="") return input().strip().lower() == "y" def db_structures_are_identical(db_url_a, db_url_b, **kwargs): """ Do two databases have identical schema structure? This method tells you the answer. - db_url_a: first database - db_url_b: second database - **kwargs: additional args, for passing into a `migra` migration object. """ differences = db_differences(db_url_a, db_url_b, **kwargs) return not differences.strip() def creation_statements(db_url, **kwargs): with S(db_url, poolclass=NullPool) as s: m = Migration(None, s, **kwargs) m.set_safety(False) m.add_all_changes() return m.sql def schema_hash(db_url, **kwargs): import hashlib sql = creation_statements(db_url, **kwargs) encoded = sql.encode("utf-8") hash = hashlib.sha1(encoded) return hash.hexdigest() def drop_statements(db_url, **kwargs): with S(db_url, poolclass=NullPool) as s: m = Migration(s, None, **kwargs) m.set_safety(False) m.add_all_changes() return m.sql def db_differences(db_url_a, db_url_b, **kwargs): with S(db_url_a, poolclass=NullPool) as a, S(db_url_b, poolclass=NullPool) as b: m = Migration(a, b, **kwargs) m.set_safety(False) m.add_all_changes() return m.sql def sync_db_structure_to_target_db( db_url, target_db_url, confirm=True, create_extensions_only=False, **kwargs ): log.info(f"syncing: {db_url} to {target_db_url}") with S(db_url, poolclass=NullPool) as s_current, S( target_db_url, poolclass=NullPool ) as s_target: m = Migration(s_current, s_target, **kwargs) m.set_safety(False) if create_extensions_only: log.info("Syncing extension creation only...") m.add_extension_changes(creates=True, drops=False) else: m.add_all_changes() if m.statements: if confirm: # pragma: no cover print("THE FOLLOWING CHANGES ARE PENDING:", end="\n\n") print(m.sql) print() if not confirm or prompt("Apply these changes?"): log.info("Applying...") m.apply() else: if confirm: # pragma: no cover print("Not applying.") else: if confirm: # pragma: no cover print("Already synced.") current_schema_hash = schema_hash(db_url) if confirm: # pragma: no cover print(f"Schema hash: {current_schema_hash}") def sync_db_structure_to_setup_method( db_url, setup_method, confirm=True, create_extensions_only=False, **kwargs ): with temporary_database(host="localhost") as temp_db_url: setup_method(temp_db_url) sync_db_structure_to_target_db( db_url, temp_db_url, confirm=confirm, create_extensions_only=create_extensions_only, **kwargs, ) def sync_db_structure_to_definition( db_url, definition_text, confirm=True, create_extensions_only=False, **kwargs ): def load_def(db_url): connections.db(db_url, poolclass=NullPool).raw(definition_text) sync_db_structure_to_setup_method( db_url, load_def, confirm=confirm, create_extensions_only=create_extensions_only, **kwargs, ) PK!5QF F results/openers.pyimport io from itertools import islice from pathlib import Path import chardet import csvx from .resultset import Results def detect_string_enc(contents): b = io.BytesIO(contents) b.seek(0) return detect_enc(b) def detect_enc(f): return chardet.detect(f.read())["encoding"] def smart_open(f): try: return io.open(f) except TypeError: return f def first_n_lines(stream, n=10): head = list(islice(stream, n)) return head def csv_column_names(f, *args, **kwargs): if isinstance(f, str): f = Path(f) f = smart_open(f) head = first_n_lines(f, n=1)[0] f.seek(0) headf = io.StringIO(head) with csvx.OrderedDictReader(headf, *args, **kwargs) as r: return r.fieldnames def csv_rows_it(f, *args, **kwargs): with csvx.OrderedDictReader(f, *args, **kwargs) as r: for row in r: yield row def csv_row_tuples_it(f, *args, **kwargs): with csvx.Reader(f, *args, **kwargs) as r: for row in r: yield row def from_csv(f, *args, **kwargs): return Results(csv_rows_it(f, *args, **kwargs)) def dicts_from_rows(rows): try: first = rows[0] except IndexError: return [] rest = rows[1:] def it(): for d in rest: dd = dict(zip(first, d)) yield dd return list(it()) def from_xlsx(f): from openpyxl import load_workbook if isinstance(f, Path): f = str(f) wb = load_workbook(filename=f) wsheets = list(wb) def xget_row_values(row): return [c.internal_value or "" for c in list(row)] def do_sheet(ws): rows = [xget_row_values(_) for _ in list(ws.rows)] return dicts_from_rows(rows) return dict(zip(wb.sheetnames, (Results(do_sheet(_)) for _ in wsheets))) def from_xls(f, file_contents=None): from xlrd import open_workbook wb = open_workbook(str(f), file_contents=file_contents) def get_row_values(ws, rownum): return [str(ws.cell_value(rowx=rownum, colx=c) or "") for c in range(ws.ncols)] def do_sheet(ws): rows = [get_row_values(ws, rx) for rx in range(ws.nrows)] return dicts_from_rows(rows) wsheets = [wb.sheet_by_index(_) for _ in range(wb.nsheets)] return dict(zip(wb.sheet_names(), (Results(do_sheet(_)) for _ in wsheets))) OPENERS = {".xlsx": from_xlsx, ".xls": from_xls, ".csv": from_csv} def from_file(f): p = Path(f) extension = p.suffix try: return OPENERS[extension](f) except KeyError: raise ValueError(f"cannot open a file with extension: {extension}") PK!1xxresults/paging.pyimport base64 import csv import io import json from itertools import zip_longest from .sqlutil import ordering_from_parsed, parse_ordering, reversed_ordering sio = io.StringIO csvreader = csv.reader csvwriter = csv.writer PAGED_QUERY = """ select * from ( {q} ) unpaged_table {bookmark} {order_by} {limit} """ PAGED_QUERY_MSSQL = """ select {limit} * from ( {q} ) unpaged_table {bookmark} {order_by} """ PARAM_PREFIX = "paging_bookmark_" def encode_bookmark_values(decoded): j = json.dumps(decoded) j8 = j.encode("utf-8") return base64.urlsafe_b64encode(j8).decode("utf-8") def decode_bookmark(encoded): j = base64.urlsafe_b64decode(encoded) jt = j.decode("utf-8") return json.loads(jt) def bind_pairs_iter(cols, bookmark, swap_on_descending=False): for i, zipped in enumerate(zip(cols, bookmark)): col, val = zipped name, is_descending = col lowercase_name = name.lower() bind = f":{PARAM_PREFIX}{lowercase_name}" if swap_on_descending and is_descending: yield name, bind else: yield bind, name def generate_or_statement(a, b, a_compared, b_compared): if a and b: a_first, b_first = a[0], b[0] or_statement = generate_or_statement( a[1:], b[1:], a_compared + [a[0]], b_compared + [b[0]] ) equalities = [f"{x} = {y}" for x, y in zip(a_compared, b_compared)] equalities_joined = " and ".join(equalities) if or_statement: return f"({equalities_joined} and {a_first} > {b_first}) or {or_statement}" else: return f"({equalities_joined} and {a_first} > {b_first})" def recursive_comparison(a, b): or_statement = generate_or_statement(a[1:], b[1:], [a[0]], [b[0]]) return f"({a[0]} > {b[0]} or {or_statement})" def make_bookmark_where_clause(cols, bookmark, backwards=False, supports_row=True): if bookmark is None: return "" pairslist = bind_pairs_iter(cols, bookmark, swap_on_descending=True) b, a = zip(*pairslist) if len(a) > 1 or len(b) > 1: if supports_row: a, b = ", ".join(a), ", ".join(b) return f"where row({a}) > row({b})" else: return f"where {recursive_comparison(a, b)}" else: return f"where {a[0]} > {b[0]}" def paging_params(cols, bookmark): names = [PARAM_PREFIX + c[0].lower() for c in cols] return dict(zip_longest(names, bookmark or [])) def paging_wrapped_query( query, ordering, bookmark, per_page, backwards, use_top=False, supports_row=True ): cols = parse_ordering(ordering) if backwards: cols = reversed_ordering(cols) bookmark_clause = make_bookmark_where_clause( cols, bookmark, backwards, supports_row=supports_row ) order_list = ordering_from_parsed(cols) order_by = f"order by {order_list}" if use_top: limit = f"top {per_page + 1}" else: limit = f"limit {per_page + 1}" params = paging_params(cols, bookmark) formatted = (PAGED_QUERY_MSSQL if use_top else PAGED_QUERY).format( q=query, bookmark=bookmark_clause, order_by=order_by, limit=limit ) return formatted, params class Paging: def __init__(self, results, per_page, ordering, bookmark, backwards): self.results = results self.per_page = per_page self.ordering = ordering self.bookmark = bookmark self.backwards = backwards self.parsed_ordering = parse_ordering(ordering) self.order_keys = [c[0] for c in self.parsed_ordering] try: self.discarded_item = results.pop(per_page) self.has_more = True except IndexError: self.discarded_item = None self.has_more = False if backwards: results.reverse() self.count = len(self.results) @property def has_after(self): return self.discarded_item is not None @property def has_before(self): return self.bookmark is not None @property def has_next(self): if self.backwards: return self.has_before else: return self.has_after @property def has_prev(self): if self.backwards: return self.has_after else: return self.has_before @property def at_start(self): return not self.has_prev @property def at_end(self): return not self.has_next @property def is_all(self): return self.at_start and self.at_end @property def is_full(self): return self.count == self.per_page @property def past_start(self): return self.backwards and self.at_start and not self.is_full @property def past_end(self): return not self.backwards and self.at_end and not self.is_full @property def next(self): if not self.is_empty: return self.get_bookmark(self.results[-1]) @property def prev(self): if not self.is_empty: return self.get_bookmark(self.results[0]) @property def is_empty(self): return not bool(self.count) @property def current(self): return self.bookmark @property def current_reversed(self): if self.discarded_item: return self.get_bookmark(self.discarded_item) def get_bookmark(self, result_row): return tuple(result_row[k] for k in self.order_keys) PK! results/pivoting.pyimport itertools from .resultset import Results class item: def __init__(self, i): self.i = i @property def after(self): aft = [] x = self.next while x and x.i != self.i and x.i not in aft: aft.append(x.i) x = x.next return aft @property def before(self): bef = [] x = self.prev while x and x.i != self.i and x.i not in bef: bef.append(x.i) x = x.next return bef class items: def __init__(self, _list): full = [] previous = None for x in _list: i = item(x) i.prev = previous if previous: previous.next = i i.next = None full.append(i) previous = i self._list = full def ordering(groups): glist = [items(_) for _ in groups] allitems = itertools.chain(*groups) d_all = {k: None for k in allitems} d_before = {k: set() for k in d_all.keys()} for g in glist: for i in g._list: val = i.i d_before[val] = d_before[val] | set(i.before) _result = [] while True: if not d_all: break for k in list(d_all.keys()): if d_before[k] - set(_result): continue _result.append(k) d_all.pop(k) break return _result def pivoted(_results): r = _results try: *down, across, values = r.keys() except ValueError: raise ValueError( "Pivoting requires at least 3 columns for input: 1 or more for row labels, 1 for column labels, and one values column" ) downvalues = r.distinct_values(columns=down) orders = [ _.distinct_values(column=across) for _ in r.grouped_by(columns=down).values() ] acrossvalues = ordering(orders) key_cols = down + [across] def cell_key(row): return tuple(row[k] for k in key_cols) d = {cell_key(row): row[values] for row in r} def pivoted_it(): for downvalue in downvalues: out = {k: v for k, v in zip(down, downvalue)} row = { acrossvalue: d.get(tuple(list(downvalue) + [acrossvalue]), None) for acrossvalue in acrossvalues } out.update(row) yield out return Results(pivoted_it()) PK!hKresults/resources.pyimport inspect from pkg_resources import resource_filename as pkg_resource_filename from pkg_resources import resource_stream as pkg_resource_stream def external_caller(): i = inspect.stack() names = (inspect.getmodule(i[x][0]).__name__ for x in range(len(i))) return next(name for name in names if name != __name__) def resource_path(subpath): module_name = external_caller() return pkg_resource_filename(module_name, subpath) def resource_stream(subpath): module_name = external_caller() return pkg_resource_stream(module_name, subpath) def resource_data(subpath): return resource_stream(subpath).read() def resource_text(subpath, encoding="utf-8"): return resource_stream(subpath).read().decode(encoding) PK!\Uresults/result.pyfrom .uuids import deterministic_uuid class Result(dict): def __init__(self, *args, **kwargs): self.annotations = {} super().__init__(*args, **kwargs) def deterministic_uuid(self, columns, uuid_domain): return deterministic_uuid( uuid_domain, {k: v for k, v in self.items() if k in columns} ) def __getattr__(self, name): try: return self[name] except KeyError: raise AttributeError def __setattr__(self, name, value): if name == "annotations": object.__setattr__(self, name, value) else: self[name] = value def __getitem__(self, key): if isinstance(key, int): return list(self.values())[key] return super().__getitem__(key) def __iter__(self): for k, v in self.items(): if k != "annotations": yield v def scalar(self): return self[0] PK!KLLresults/resultset.pyimport io import itertools from numbers import Number from pathlib import Path import csvx from .annotations import AnnotationsMixin from .cleaning import standardized_key_mapping from .result import Result from .sqlutil import create_table_statement from .typeguess import guess_sql_column_type def results(rows): return Results(rows) def resultproxy_to_results(rp): if rp.returns_rows: cols = rp.context.cursor.description keys = [c[0] for c in cols] r = Results(rp) r._keys_if_empty = keys return r else: return None class Results(list, AnnotationsMixin): def __init__(self, *args, **kwargs): try: given = args[0] given = [Result(_) for _ in given] args = list(args) args[0] = given args = tuple(args) self.paging = None except IndexError: pass self._keys_if_empty = None super().__init__(*args, **kwargs) def all_keys(self): keylist = dict() for row in self: rowkeys = row.keys() for key in rowkeys: if key not in keylist: keylist[key] = True return list(keylist.keys()) def by_key(self, key, value=None): def get_value(row): if value is None: return row else: return row[value] return {_[key]: get_value(_) for _ in self} def with_key_superset(self): all_keys = self.all_keys() def dict_with_all_keys(d): return {k: d.get(k, None) for k in all_keys} return Results([dict_with_all_keys(_) for _ in self]) def with_renamed_keys(self, mapping): def renamed_key(x): if x in mapping: return mapping[x] return x def renamed_it(): for row in self: d = { renamed_key(k): v for k, v in row.items() if renamed_key(k) is not None } yield Result(d) return Results(list(renamed_it())) def standardized_key_mapping(self): return standardized_key_mapping(self.keys()) def with_standardized_keys(self): return self.with_renamed_keys(self.standardized_key_mapping()) def strip_values(self): for row in self: for k, v in row.items(): if v and isinstance(v, str): stripped = v.strip() if stripped != v: row[k] = stripped def strip_all_values(self): self.strip_values() def standardize_spaces(self): self.clean_whitespace() def clean_whitespace(self): for row in self: for k, v in row.items(): if v and isinstance(v, str): standardized = " ".join(v.split()) if standardized != v: row[k] = standardized def delete_key(self, column=None): for row in self: try: del row[column] except KeyError: pass def delete_keys(self, columns=None): for row in self: for c in columns: try: del row[c] except KeyError: pass def set_blanks_to_none(self): for row in self: for k, v in row.items(): if isinstance(v, str) and not v.strip(): row[k] = None def replace_values(self, before, after): for row in self: for k, v in row.items(): if v == before: row[k] = after def values_for(self, column=None, columns=None): if column is not None: values = [_[column] for _ in self] elif columns is not None: values = [tuple(_[c] for c in columns) for _ in self] else: values = list(self.values()) return values def distinct_values(self, column=None, columns=None): values = self.values_for(column, columns) d = {k: True for k in values} return list(d.keys()) @property def csv(self): f = io.StringIO() with csvx.DictWriter(f, lineterminator="\n") as w: w.write_dicts(self) value = f.getvalue() return value def save_csv(self, destination): Path(destination).write_text(self.csv) def save_xlsx(self, destination): from xlsxwriter import Workbook workbook = Workbook(destination) worksheet = workbook.add_worksheet() for r, row in enumerate([self.keys()] + self): for c, col in enumerate(row): worksheet.write(r, c, col) workbook.close() def keys(self): try: first = self[0] except IndexError: if self._keys_if_empty is None: return [] else: return self._keys_if_empty return list(first.keys()) def copy(self): return Results(self) def grouped_by(self, column=None, columns=None): def keyfunc(x): if column: return x[column] if columns: return tuple([x[k] for k in columns]) copied = Results(self) copied.sort(key=keyfunc) def grouped_by_it(): for k, g in itertools.groupby(copied, keyfunc): yield k, Results(g) return dict(grouped_by_it()) def __getitem__(self, x): if isinstance(x, slice): return Results(list(self)[x]) elif isinstance(x, Number): return list.__getitem__(self, x) else: return [_[x] for _ in self] def one(self): length = len(self) if not length: raise RuntimeError("should be exactly one result, but there is none") elif length > 1: raise RuntimeError("should be exactly one result, but there is multiple") return self[0] def scalar(self): return self.one()[0] def pivoted(self): from .pivoting import pivoted return pivoted(self) try: down, across, values = self.keys() except ValueError: raise ValueError("pivoting requires exactly 3 columns") downvalues = self.distinct_values(down) acrossvalues = self.distinct_values(across) d = {(row[down], row[across]): row[values] for row in self} def pivoted_it(): for downvalue in downvalues: out = {down: downvalue} row = { acrossvalue: d.get((downvalue, acrossvalue), None) for acrossvalue in acrossvalues } out.update(row) yield out return Results(pivoted_it()) def make_hierarchical(self): previous = None for r in self: original = Result(r) if previous: for k, v in r.items(): if previous[k] == v: r[k] = "" else: break previous = original @property def md(self): from tabulate import tabulate return tabulate(self, headers="keys", tablefmt="pipe") def guessed_sql_column_types(self): return {k: guess_sql_column_type(self.values_for(k)) for k in self.keys()} def guessed_create_table_statement(self, name): guessed = self.guessed_sql_column_types() return create_table_statement(name, guessed) PK!$results/sqlutil.pyASC_OR_DESC = ("desc", "asc") def quoted(column_name): return f'"{column_name}"' def parse_ordering_col(c): tokens = c.rsplit(None, 1) if len(tokens) > 1: possible_direction = tokens[-1].lower() if possible_direction in ASC_OR_DESC: c = tokens[0] descending = possible_direction == "desc" else: descending = False else: descending = False c = c.strip().strip('"') return c, descending def parse_ordering(ordering): cols = ordering.split(",") return [parse_ordering_col(c) for c in cols] def reversed_ordering(ordering): return [(c[0], not c[1]) for c in ordering] def ordering_from_parsed(cols): clist = [(quoted(c) + (descending and " desc" or "")) for c, descending in cols] return ", ".join(clist) def create_table_statement(table_name, colspec): quoted_name = quoted(table_name) colspec = ",\n ".join(f"{quoted(k)} {v}" for k, v in colspec.items()) return f"""create table {quoted_name} ( {colspec} ); """ PK!%ƧOOresults/typeguess.py# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals import pendulum import six from pendulum.date import Date as date_type from pendulum.datetime import DateTime as datetime_type def guess_value_type(x): if x == "": return None try: f = float(x) if f.is_integer(): return int else: return float except (ValueError, TypeError): pass try: p = pendulum.parse(x, exact=True) if isinstance(p, datetime_type): return datetime_type if isinstance(p, date_type): return date_type except Exception: pass return six.text_type def guess_column_type(values): present = {guess_value_type(x) for x in values if x is not None} if six.text_type in present: return six.text_type if datetime_type in present: return datetime_type if date_type in present: return date_type if float in present: return float if int in present: return int return six.text_type def guess_sql_column_type(values): return PY_TO_SQL[guess_column_type(values)] PY_TO_SQL = { six.text_type: "text", datetime_type: "timestamp", date_type: "date", float: "decimal", int: "int", None: "text", } PK!ɂresults/uuids.pyimport json import uuid def dns_uuid(domain_name): return uuid.uuid5(uuid.NAMESPACE_DNS, domain_name) def stringified_dict(d): return {str(k): str(v) for k, v in d.items()} def contents_json(contents_dict): d = stringified_dict(contents_dict) return json.dumps(d, sort_keys=True) def deterministic_uuid(domain, contents_dict): dns = dns_uuid(domain) j = contents_json(contents_dict) return uuid.uuid5(dns, j) PK!HڽTU&results-0.1.1552206909.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!H^`҂ )results-0.1.1552206909.dist-info/METADATAXms۸_("IDzki8#C$$!& -&?$E;iK$}~/,O m*V, T5A5 8h4[u\WR/, rCs'aw`>ۇJ&0W+bFhˮD^ X27*Tz*$V pɸ1r+X~jybǮxNP[WL'";ƮX:eswxm[gj}CݺNjEyXQ_x9Ƚgp$*nUFm5YS;c>vr/g -Tg:3s 0EߨŠ?KykS|SGM|v5|aH߯ViM`bwYK-nߨʆA^ag#AbJeU 3vWA0dZ 7JfBd?nBh![H1Z }KWx<2=_ \;B\!#V faL;6IkaبPiQR$HGGuz[e,so I٭88I=B3s,*M.cVuOu6&G?0UdǐőmkB,ck+ 7 N!2UT6(~.Y`m<1i >Ҭc)Z}k곰äpnJYސe ,tOpEKD/,GF簒@KרNA]q c`=EtwlB"2 T[o 5s~+U;e)[r3(@N. ؙ*GR:p#{,ę"i)C&Ŵ3!\UNfSҕ/Al:wss5yI.Έ}Yx<( RCd`_L6YЄ`;#))YLrXҟE;G)l d5kwܔ_t"z|CV05<|I5Wnajå{I!CU?v%F߂oCY5^?z{b#zH,Z`Le<Ŵ;NJ_gt>7EGhZ~zc2΢-E_AtO:e4>~hEr:vo~ߤ=&a p!~ wtY>izQuS n[k*_u8cP ʽ̔QH֫;%1vQ%nhmԵS|8)yh;Q"TdNc9uz8~)vZ&7lۑ_AT #n \(To3BC-v(x`-P14#ˬ,SmN5UR&Hfl↡YMQˑ.URE^Wߢ xTL4fwbygaxU,j›KJ?>ifݡ;Fu[(e;i@ۗ]3 !]~^#a}Xc0yp6 .S$>GB/*BwQ1jgߝ+-9XDJHC$7;ƭ>̴ry%%DcJe=趗pW@gʩ<~(HWΛvhUɨ?6VŴkz@p2ͽ `9՚0`W]k+mexIZ C zR,|*M-Wnm G ۦ:aCX-K#AK<:$0oeP(C}sT 'ڤ5}N}`əǽ&g̶ʫ!`"r>`<8'xVXnA";ܪō٧NqR<-)(`8Vz[MWN <.⍃pRƴv;]kumaBu)aD37іSR.0,n󄥴Q ^%~Z0lfsÏ2MlŚܘ"k]ݧ8#!'^~x/׺Un"0њ&Ktt U`4O7 A;ߩg:KnZ k k@O|Fg 70CTZr;[!LJ@d[L2Z-3ew$E>Rv$D雚yE({Xpn4I- Pśǝy/p[;,ⷿ;AM AkU(o8;vRP& 8T]ďPK!~^wresults/__init__.pyPK!SCresults/annotations.pyPK!~z  H results/cleaning.pyPK!>;Ǎ22 results/connections.pyPK!results/fileutil.pyPK!w??#results/inserting.pyPK!xO+results/itercsv.pyPK!7(7-results/migration.pyPK!5QF F `Bresults/openers.pyPK!1xxLresults/paging.pyPK! }bresults/pivoting.pyPK!hK4lresults/resources.pyPK!\UYoresults/result.pyPK!KLLOsresults/resultset.pyPK!$͑results/sqlutil.pyPK!%ƧOOresults/typeguess.pyPK!ɂresults/uuids.pyPK!HڽTU&results-0.1.1552206909.dist-info/WHEELPK!H^`҂ )results-0.1.1552206909.dist-info/METADATAPK!HГ'results-0.1.1552206909.dist-info/RECORDPKRѫ