PK!Qaaexcelschema/__init__.pyfrom .core import SchemaParser from .constraint import Constraint from .util import parse_record PK!:excelschema/constraint.pyfrom typing import NamedTuple, Any, Union class Constraint(NamedTuple): type_: Union[type, list, type(Any)] = Any unique: bool = False not_null: bool = False def __repr__(self): if not self.unique and not self.not_null: return repr(self.type_) else: return super(Constraint, self).__repr__() class ConstraintMapping: def __init__(self): self.type_ = dict() self.preexisting = dict() self.not_null = set() def update(self, schema_dict): if schema_dict: for k, c in schema_dict.items(): if isinstance(c, type): self._parse_type(k, c) else: assert isinstance(c, Constraint), repr(c) if c.type_: self._parse_type(k, c.type_) if c.unique: self.preexisting.setdefault(k, set()) if c.not_null: self.not_null.add(k) def _parse_type(self, k, type_): if k in self.type_.keys(): expected_type = self.type_[k] if expected_type is not Any: if type_ is not expected_type: raise TypeError else: self.type_[k] = type_ def _view(self): all_keys = set(self.type_.keys()) | set(self.preexisting.keys()) | self.not_null for k in all_keys: type_ = self.type_.get(k, Any) unique = k in self.preexisting.keys() not_null = k in self.not_null yield k, Constraint(type_, unique, not_null) def view(self): return dict(self._view()) def __repr__(self): return repr(self.view()) PK!2GSSexcelschema/core.pyfrom copy import deepcopy from .constraint import ConstraintMapping from .exception import NotUniqueException, NotNullException, NonUniformTypeException from .util import parse_record, parse_excel_array class SchemaParser: constraint_mapping = ConstraintMapping() records = list() def __init__(self, records=None, array=None, header=True, as_datetime_str=False, schema=None): self.as_datetime_str = as_datetime_str if schema: self.constraint_mapping.update(schema) if records and array: self.records = self.ensure_multiple(records=records, array=array, header=header) @property def schema(self): """Get table's latest schema Returns: dict -- dictionary of constraints """ return self.constraint_mapping.view() @schema.setter def schema(self, schema_dict): """Reset and set a new schema Arguments: schema_dict {dict} -- dictionary of constraints or types """ self.constraint_mapping = ConstraintMapping() self.update_schema(schema_dict) def update_schema(self, schema_dict): """Update the schema Arguments: schema_dict {dict} -- dictionary of constraints or types """ self.constraint_mapping.update(schema_dict) self.ensure_multiple(self.records) def ensure_multiple(self, records=None, update_schema=False, array=None, header=None): """Sanitizes records, e.g. from Excel spreadsheet Arguments: records {list, tuple} -- Iterable of records. Can be a 2-D array of list of dictionaries Returns: list -- List of records """ def _records(): nonlocal records records = parse_excel_array(records=records, array=array, header=header) for record_ in records: record_schema = parse_record(record_, yield_='type') num_to_str = set() for k, v in record_schema.items(): expected_type = self.constraint_mapping.type_.get(k, None) if expected_type and v is not expected_type: if expected_type is str and v in (int, float): num_to_str.add(k) else: raise NonUniformTypeException('{} not in table schema {}' .format(v, self.schema)) self.constraint_mapping.update(schema_dict=record_schema) record_ = parse_record(record_, yield_='record', as_datetime_str=self.as_datetime_str) for k, v in record_.items(): if k in num_to_str: record_[k] = str(v) is_null = self.constraint_mapping.not_null - set(record_.keys()) if len(is_null) > 0: raise NotNullException('{} is null'.format(list(is_null))) yield record_ temp_mapping = None if not update_schema: temp_mapping = deepcopy(self.constraint_mapping) for c in self.schema.values(): assert not isinstance(c.type_, list) records = list(_records()) for record in records: self._update_uniqueness(record) if not update_schema: self.constraint_mapping = ConstraintMapping() self.constraint_mapping.update(temp_mapping.view()) else: self.records.extend(records) return records def ensure_one(self, record, update_schema=False): return self.ensure_multiple([record], update_schema=update_schema)[0] def _update_uniqueness(self, record_dict): for k, v in parse_record(record_dict, yield_='type').items(): if k in self.constraint_mapping.preexisting.keys(): if v in self.constraint_mapping.preexisting[k]: raise NotUniqueException('Duplicate {} for {} exists'.format(v, k)) self.constraint_mapping.preexisting[k].add(v) PK!)'Uexcelschema/exception.pyclass NonUniformTypeException(TypeError): pass class NotUniqueException(ValueError): pass class NotNullException(ValueError): pass PK!z/excelschema/util.pyimport unicodedata from datetime import datetime, date import dateutil.parser from collections import OrderedDict import itertools def normalize_chars(s): return unicodedata.normalize("NFKD", s) def parse_record(record, yield_='type', as_datetime_str=False): return dict(_parse_record(record, yield_, as_datetime_str)) def _parse_record(record, yield_='type', as_datetime_str=False): def _yield_switch(x): if yield_ == 'type': return type(x) elif yield_ == 'record': if isinstance(x, (datetime, date)): x = x.isoformat() if not as_datetime_str: x = dateutil.parser.parse(x) return x else: raise ValueError for k, v in record.items(): if isinstance(v, str): v = normalize_chars(v.strip()) if v.isdigit(): v = int(v) elif '.' in v and v.replace('.', '', 1).isdigit(): v = float(v) elif v in {'', '-'}: continue else: try: v = dateutil.parser.parse(v) except ValueError: pass elif isinstance(v, date): v = datetime.combine(v, datetime.min.time()) yield k, _yield_switch(v) def parse_excel_array(records=None, array=None, header=True): if records and array: raise ValueError('Please specify either record or array') if array: if header: if not isinstance(header, (list, tuple)): header = array[0] array = array[1:] else: header = itertools.count() records = list() for row in array: records.append(OrderedDict(zip(header, row))) if isinstance(header, itertools.count): header = itertools.count() return records PK! ::%excelschema-0.1.2.4.dist-info/LICENSEMIT License Copyright (c) 2018 Pacharapol Withayasakpunt Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PK!H WX#excelschema-0.1.2.4.dist-info/WHEEL A н#Z."jm)Afb~ڠO68oF04UhoAf f4=4h0k::wXPK!H"\v&excelschema-0.1.2.4.dist-info/METADATAUێ6}W du. ) 6n҇iilH{lMQ9CΜGKxZ0.50/Pbg2d؍*n~ (0XhSjn,઄6D [{+Lk67m*+t5ᵖq;Q/^gw[mr8w;K%VR7WU`'cTXMgWgF<=Oe97/ΎbmJXGُΔ ٔ3z4{2dFԎԖεr\zTlnL;ʼn@!7:.%w3e 9c}Aw, mj ~_Ykt:n5IJ묵8F{x\Z $2-&Z>&ja!&QIv(eҖfjm݇Xݩdk]!*۠[tAHBь&k@+"aZd8κ:=0_Ҟe"h\2JA}wPffghIfk+Guc /\!oJ"dI)q9 .;x8^g!CF8$t1!œo[!dO!)6x`YAL2TAGj^/Ɠy)vDo(U~idQ jO oHڻC}J |6񜞔9yZy k0us\¸*׵K #VAA@W&vW%QMH:t~D4~FpԚrPK!HQ$excelschema-0.1.2.4.dist-info/RECORDϻ@|N"*.EFn-OU&f'3I.M(#d \EEZKoEvNRXX o/'9Oy:iFUyyd/Z?wot~`C͏8ASb!0$ Ge~d:i!ǜ,haF6 :sk9eV HKf%Gcqh1G60ʧBnzaѪ5ř;1'@Q[<q"Oδc<.2㷮+7qª.lsܧ^ ƪ^9}xFa73%_7\(pMG3Z $!mP8{%bRC` ^hՏQ֒yeNTafPK!Qaaexcelschema/__init__.pyPK!:excelschema/constraint.pyPK!2GSSexcelschema/core.pyPK!)'U0excelschema/exception.pyPK!z/excelschema/util.pyPK! ::% excelschema-0.1.2.4.dist-info/LICENSEPK!H WX#/%excelschema-0.1.2.4.dist-info/WHEELPK!H"\v&%excelschema-0.1.2.4.dist-info/METADATAPK!HQ$g)excelschema-0.1.2.4.dist-info/RECORDPK _+