PK{gMbQ<fastgenomics/__init__.py""" FASTGenomics python helper """ from get_version import get_version __version__ = get_version(__file__) del get_version PK8L=?  fastgenomics/_resources.pyimport functools import pkg_resources import jinja2 from . import __name__ as module_name j2env = jinja2.Environment(loader=jinja2.PackageLoader(module_name, 'templates')) resource_bytes = functools.partial(pkg_resources.resource_string, module_name) PKT(N] fastgenomics/app.pyimport jsonschema import json from logging import getLogger from pathlib import Path from .defaults import DEFAULT_APP_DIR from .parameters import FGParameters from ._resources import resource_bytes logger = getLogger('fastgenomics.common') class FGApp(object): # files that are checked for existence that are not otherwise # explicitly used by the FGApp _mandatory_files = ["LICENSE", "LICENSE-THIRD-PARTY"] def __init__(self, app_dir=DEFAULT_APP_DIR): if isinstance(app_dir, str): app_dir = Path(app_dir) self.app_dir = app_dir self.manifest = self.get_manifest() self.default_parameters = FGParameters( self.manifest["application"]["parameters"]) self.app_type = self.manifest["application"]["type"] self.inputs = self.manifest['application']['input'] self.outputs = self.manifest['application']['output'] assert self.app_type in ["Calculation", "Visualization"] self.check_files() def get_manifest(self): manifest_file = self.app_dir / "manifest.json" assert manifest_file.exists() manifest = json.loads(manifest_file.read_bytes()) check_manifest(manifest) return manifest def check_files(self): for f in self._mandatory_files: absolute_path = self.app_dir / f if not absolute_path.exists(): raise FileNotFoundError(f"{f} not found in the app directory!") def check_manifest(config: dict): """ Asserts that the manifest (``manifest.json``) matches our JSON-Schema. If not a :py:exc:`jsonschema.ValidationError` will be raised. """ schema = json.loads(resource_bytes('schemes/manifest_schema.json')) jsonschema.validate(config, schema) input_types = config["application"]["input"] output_types = config["application"]["output"] def err_msg(x, y): return "'{}'-type not supported for {}-operations.".format(x, y) if input_types is not None: for name, properties in input_types.items(): if properties["type"] == "output_only": raise RuntimeError(err_msg(properties["type"], "input")) if output_types is not None: for name, properties in output_types.items(): if properties["type"] == "dataset_manifest": raise RuntimeError(err_msg(properties["type"], "output")) PKKPM7Q fastgenomics/app_creator.py""" FASTGenomics App Creation Suite =============================== Provides methods to create basic boilerplate for your apps. """ from pathlib import Path from logging import getLogger from ._resources import j2env from . import _common logger = getLogger('fastgenomics.testing') # registry DOCKER_REGISTRY = 'apps.fastgenomics.org' def create_docker_compose(app_dir: Path, app_name: Path, sample_dir: Path, docker_registry: str = DOCKER_REGISTRY): """ Creates an ``docker-compose.test.yml`` for testing Args: app_dir: Root directory of the app app_name: Name of the app, ``snake_case`` sample_dir: Data root dir containing ``data/**`` and ``config/input_file_mapping.json`` docker_registry: App registry """ docker_compose_file = app_dir / 'docker-compose.test.yml' if docker_compose_file.exists(): logger.warning(f"{docker_compose_file.name} already existing! Aborting.") return # get app type manifest = _common.get_app_manifest()["application"] app_type = manifest['Type'] logger.info("Loading docker-compose.test.yml template") template = j2env.get_template('docker-compose.yml.j2') logger.info(f"Writing {docker_compose_file}") with docker_compose_file.open('w') as f_out: temp = template.render( app_name=app_name, sample_dir=sample_dir.relative_to(app_dir), docker_registry=docker_registry, app_type=app_type, ) f_out.write(temp) def create_file_mapping(sample_dir: Path): """ Creates a base ``input_file_mapping.json`` Args: sample_dir: Sample data directory that will contain ``data/**`` and ``config/input_file_mapping.json`` """ sample_output_dir = sample_dir / 'data' / 'other_app_uuid' / 'output' file_mapping_file = sample_dir / 'config' / 'input_file_mapping.json' if file_mapping_file.exists(): logger.warning(f"{file_mapping_file} already existing! Aborting.") return # creating output directories sample_output_dir.mkdir(parents=True, exist_ok=True) file_mapping_file.parent.mkdir(parents=True, exist_ok=True) # create file_mappings manifest = _common.get_app_manifest()["application"] input_keys = manifest['input'].keys() file_mapping = {key: sample_output_dir / 'fix_me.txt' for key in input_keys} # write file_mappings logger.info("Loading input_file_mapping.json template") template = j2env.get_template('input_file_mapping.json.j2') logger.info(f"Writing {file_mapping_file}") with file_mapping_file.open('w') as f_out: temp = template.render(file_mapping=file_mapping) f_out.write(temp) print() print(f"Please edit {file_mapping_file} and provide the following files:") for key in input_keys: print(f" - {key}: {manifest['Input'][key]['Usage']} ({manifest['Input'][key]['Type']})") print() PKT(N  fastgenomics/data.pyfrom pathlib import Path import json from .defaults import DEFAULT_DATA_ROOT class FGData(object): """This class stores the paths to data structured according to the fastgenomics specification. It also loads the input file mappings and checks if the files exist. """ _subdirs = ["data", "config", "output", "summary"] def __init__(self, data_root=DEFAULT_DATA_ROOT): if isinstance(data_root, str): data_root = Path(data_root) self.root = data_root self.paths = self.get_paths() self.input_file_mapping = self.get_input_file_mapping() self.parameters = self.get_parameters() def get_input_file_mapping(self): mapping_file = self.paths['config'] / "input_file_mapping.json" mapping = json.loads(mapping_file.read_bytes()) # convert to absolute paths and check for existence for f, rel_path in mapping.items(): abs_path = self.paths['data'] / rel_path if not abs_path.exists(): raise FileNotFoundError( f"File {f} from input_file_mapping.json not found in {self.paths['data']}.") mapping[f] = abs_path return mapping def get_paths(self): return {dir: self.root / dir for dir in self._subdirs} def get_parameters(self): params_file = self.paths['config'] / "parameters.json" if params_file.exists(): return json.loads(params_file.read_bytes()) else: return {} PKT(N!_fastgenomics/defaults.pyimport os from pathlib import Path # set default paths DEFAULT_APP_DIR = Path(os.environ.get("FG_APP_DIR", '/app')) DEFAULT_DATA_ROOT = Path(os.environ.get("FG_DATA_ROOT", '/fastgenomics')) PKT(N fastgenomics/deprecated.pyfrom .process import FGProcess _PROCESS = None def get_process(): if _PROCESS is None: raise NameError( f"call set_paths(app_dir, data_root) before accessing the global process.") else: return _PROCESS def set_paths(app_dir, data_root): global _PROCESS _PROCESS = FGProcess(app_dir, data_root) _PROCESS.data.paths['app'] = app_dir def get_parameter(key): return get_process().parameters[key] def get_input_path(filename): return get_process().files[filename].path def get_output_path(filename): return get_process().files[filename].path def get_summary_path(): return get_process().data.paths['summary'] / "summary.md" def get_paths(): return {**get_process().data.paths, 'app': get_process().app.app_dir} def get_app_manifest(): return get_process().app.manifest def get_parameters(): return {name: param.value for name, param in get_process().parameters.parameter.items()} def load_input_file_mapping(): return get_process().data.input_file_mapping def get_input_file_mapping(): return get_process().data.input_file_mapping # this is supposed to convert the file mapping names to paths but we # already do that when constructing input_file_mapping anyway def str_to_path_file_mapping(ifm_dict): return get_process().data.input_file_mapping PKT(NѲhfastgenomics/io.py# coding: utf-8 from pathlib import Path class Files(object): def __init__(self, inputs: dict, outputs: dict, input_dir: Path, output_dir: Path, input_mapping: dict): self.files = {} for name, spec in outputs.items(): self.files[name] = FileOutput(name, spec, output_dir) for name, spec in inputs.items(): self.files[name] = FileInput(name, spec, input_dir, input_mapping) def __getitem__(self, key): return self.files[key] def __contains__(self, key): return key in self.files class File(object): def __init__(self, name, spec): self.name = name try: self.type = spec['type'] self.usage = spec['usage'] except KeyError: raise KeyError( f"File {self.name} is missing a required field (type or usage) in manifest.json.") def raise_if_not_exists(self): if not self.path.exists(): raise FileNotFoundError(f"File {self.name}: not found under {self.path}") class FileInput(File): def __init__(self, name, spec, root, mapping): super().__init__(name, spec) self.optional = spec.get('optional', False) if name not in mapping: if self.optional: self.path = None else: raise KeyError(f"File {name}: not found in input_file_mapping.json") else: self.path = root / mapping[name] self.raise_if_not_exists() class FileOutput(File): def __init__(self, name, spec, root): super().__init__(name, spec) self.path = root / spec['file_name'] PKT(N9fastgenomics/parameters.pyimport copy class Parameter(): type_mapping = { 'float': (int, float), 'integer': int, 'bool': bool, 'list': list, 'dict': dict, 'string': str, 'enum': object, } def __init__(self, param: dict, name): self.name = name self.description = param['description'] self.type = self.type_mapping[param['type']] self.enum = param.get('enum') self.optional = param.get('optional', False) self.value = param['default'] @property def value(self): return self._value @value.setter def value(self, val): if self.optional and val is None: self._value = val return if not isinstance(val, self.type): raise ValueError( f"Setting parameter {self.name} of type {self.type} with value {val}.") elif self.enum and val not in self.enum: raise ValueError( f"Setting parameter {self.name} with a value {val} that is not in {self.enum}.") self._value = val class FGParameters(object): def __init__(self, parameters: dict): self.parameter = {} for name, spec in parameters.items(): self.parameter[name] = Parameter(spec, name) def __getitem__(self, key): return self.parameter[key].value def update(self, values): for name, value in values.items(): if name not in self.parameter: raise KeyError( f"Parameter {name} not found in manifest.json." ) self.parameter[name].value = value def copy(self): return copy.deepcopy(self) def check(self, name, checker, error_message): if not checker(self[name]): raise ValueError( f'Parameter "{name}" failed validation: {error_message}' ) PKT(Nil_R* * fastgenomics/process.pyfrom .app import FGApp from .data import FGData from .parameters import FGParameters from .io import Files from .defaults import DEFAULT_APP_DIR, DEFAULT_DATA_ROOT from pathlib import Path from logging import getLogger logger = getLogger('fastgenomics.common') class FGProcess(object): def __init__(self, app_dir=DEFAULT_APP_DIR, data_dir=DEFAULT_DATA_ROOT): self.data = FGData(data_dir) self.app = FGApp(app_dir) check_input_file_mapping( self.data.input_file_mapping, self.app.manifest) self.parameters = self.app.default_parameters.copy() self.parameters.update(self.data.parameters) # log the updated parameter values info = "\n".join(f"{k}:{v.value}" for k, v in self.parameters.parameter.items()) logger.info(f"Parameters: \n{info}") self.files = Files( self.app.inputs, self.app.outputs, self.data.paths['data'], self.data.paths['output'], self.data.input_file_mapping) def check_input_file_mapping(input_file_mapping: Path, manifest: dict): """checks the keys in input_file_mapping and existence of the files raises a KeyError on missing Key and FileNotFoundError on missing file """ manifest = manifest["application"]['input'] not_in_manifest = set(input_file_mapping.keys()) - set(manifest.keys()) not_in_ifm = set(manifest.keys()) - set(input_file_mapping.keys()) optional = set( entry for entry, settings in manifest.items() if settings.get('optional', False)) missing = not_in_ifm - optional # check keys if not_in_manifest: logger.warning( f"Ignoring Keys defined in input_file_mapping: {not_in_manifest}") if missing: raise KeyError( f"Non-optional keys not defined in input_file_mapping: {missing}") # check for existence for key, entry in input_file_mapping.items(): if not entry.exists(): if key in optional: logger.info( f"Optional file {entry} is not present and may cause an error - be aware!") else: raise FileNotFoundError( f"{entry}, defined in input_file_mapping, not found!") PK8L ifastgenomics/tools.py""" FASTGenomics tools for app development """ from pathlib import Path from logging import getLogger logger = getLogger('fastgenomics.tools') def running_within_docker() -> bool: """ detects, if module is running within docker and returns the result as bool """ if Path('/.dockerenv').exists(): logger.debug("Running within docker") return True else: logger.info("Running locally") return False PKT(N!fastgenomics/external/__init__.pyPKT(N8A fastgenomics/external/anndata.pyfrom ..io import FileOutput, FileInput try: import pandas as pd import scipy.sparse as sp from anndata import AnnData except ImportError: raise ImportError( "This module requires anndata (https://github.com/theislab/anndata)." ) def get_path(file, content_type, input=False, output=False): """Returns a path iff the file is of requested type and """ if input and type(file) is not FileInput: raise TypeError( f'File "{file.name}" has to be an input file.' ) if output and type(file) is not FileOutput: raise TypeError( f'File "{file.name}" has to be an output file.' ) if file.type != content_type: raise TypeError( f'File "{file.name}" is of type "{file.type}" but expected "{content_type}".' ) return file.path def read_data( fgprocess, expr="expression_matrix", cell_meta="cell_metadata", gene_meta="gene_metadata", ): expr_path = get_path( fgprocess.files[expr], content_type="expression_matrix", input=True, ) cell_path = get_path( fgprocess.files[cell_meta], content_type="cell_metadata", input=True, ) gene_path = get_path( fgprocess.files[gene_meta], content_type="gene_metadata", input=True, ) obs = pd.read_csv(cell_path, index_col="cell_id") var = pd.read_csv(gene_path, index_col="entrez_id") expr = pd.read_csv(expr_path, sep=",") counts = sp.coo_matrix( (expr.expression, (expr.cell_id, expr.entrez_id)), shape=(obs.shape[0], var.shape[0]), dtype="float32", ).tocsr() adata = AnnData(counts, obs=obs, var=var) return adata # Writing def write_exprs_csv(adata, csv_file): mat = adata.X.tocoo() df = pd.DataFrame.from_dict( dict(cell_id=mat.row, entrez_id=mat.col, expression=mat.data) ) df.to_csv(csv_file) def write_data( fgprocess, adata, expr=None, cell_meta=None, gene_meta=None ): if expr is not None: exprs_path = get_path( fgprocess.files[expr], content_type="expression_matrix", output=True, ) write_exprs_csv(adata, exprs_path) if cell_meta is not None: cell_path = get_path( fgprocess.files[cell_meta], content_type="cell_metadata", output=True, ) adata.obs.to_csv(cell_path) if gene_meta is not None: gene_path = get_path( fgprocess.files[gene_meta], content_type="gene_metadata", output=True, ) adata.obs.to_csv(gene_path) PKH(Ni)fastgenomics/schemes/manifest_schema.json{ "$schema": "http://json-schema.org/draft-04/schema", "required": [ "application", "schema" ], "definitions": { "input_entry": { "type": "object", "properties": { "type": { "description": "FASTGenomics Type of the file", "enum": [ "expression_matrix", "gene_metadata", "cell_metadata", "coordinates", "assignments", "data_quality", "batch_effects", "dense_matrix", "heatmap_info", "dataset_manifest", "output_only" ], "type": "string" }, "usage": { "description": "short description of the file usage", "examples": ["gene expression matrix", "classification of cells"], "type": "string" }, "optional": { "description": "marks an input file as optional, e.g., it doesn't have to be defined in the input_file_mapping nor be existing", "type": "boolean" } }, "required": ["type", "usage"] }, "output_entry": { "allOf": [ { "$ref": "#/definitions/input_entry"}, { "properties": { "file_name": { "description": "plain filename of the output-file without directory", "type": "string", "pattern": "^[a-zA-Z0-9_.]+$" } }, "required": ["file_name"] } ] }, "parameter_entry": { "type": "object", "properties": { "type": { "description": "Type of the value of the parameter", "enum": ["string", "integer", "float", "bool", "list", "dict", "enum"] }, "optional": { "description": "Accept null as parameter value in addition to values of the given type?", "type": "boolean" }, "description": { "description": "Description of the parameter", "type": "string" }, "enum": { "description": "Valid values of an enum type", "type": "array" }, "default": { "description": "Default value of the parameter" } }, "required": ["type", "description", "default"], "if": { "properties": { "type": { "enum": ["enum"] } } }, "then": { "required": ["enum"] } } }, "type": "object", "properties": { "schema": { "type": "string", "enum": ["1.0.0"], "description": "The version of the schema itself." }, "application": { "type": "object", "properties": { "author": { "type": "object", "properties": { "email": { "description": "E-mail address of app developer", "examples": ["john.doe@fastgenomics.org"], "type": "string" }, "name": { "description": "Name of the app developer", "examples": ["Jon Doe"], "type": "string" }, "organisation": { "description": "Organization of the developer", "examples": ["FASTGenomics"], "type": "string" } } }, "name": { "description": "The name of the application", "examples": ["Hello Genomics Sample App"], "type": "string" }, "type": { "description": "Type of the application", "enum": ["Calculation", "Visualization"] }, "description": { "description": "Description of the application - can be markdown", "type": "string" }, "demands": { "type": "array", "items": { "description": "Demands on the runtime environment", "enum": ["GPU"] } }, "input": { "type": "object", "patternProperties": { "^[a-zA-Z0-9_.]+$": { "$ref": "#/definitions/input_entry" } } }, "output": { "type": "object", "patternProperties": { "^[a-zA-Z0-9_.]+$": { "$ref": "#/definitions/output_entry" } } }, "parameters": { "type": "object", "patternProperties": { "^[a-zA-Z0-9_.]+$": { "$ref": "#/definitions/parameter_entry" } } } }, "required": ["author", "name", "type", "description", "demands", "input", "output", "parameters"] } } } PK8L--,fastgenomics/templates/docker-compose.yml.j2version: '3' # this file can be used to showcase the environment an app would see. # This file is for local development purposes only. FASTGenomics does not need to see this file. services: {{ app_name }}: build: context: . image: {% if docker_registry %}{{ docker_registry.rstrip('/') + '/' }}{% endif %}{{ app_name }}:dev volumes: - ./{{ sample_dir }}/config:/fastgenomics/config/:ro - ./{{ sample_dir }}/data:/fastgenomics/data/:ro {%- if app_type == 'Calculation' %} - ./{{ sample_dir }}/output:/fastgenomics/output/ - ./{{ sample_dir }}/summary:/fastgenomics/summary/ {% elif app_type == 'Visualization' %} ports: - "8000:8000" {% endif %} PK8L笌1fastgenomics/templates/input_file_mapping.json.j2{ {% for file_key, path_val in file_mapping.items() %} "{{ file_key }}": "{{ path_val }}"{{ "," if not loop.last }} {% endfor -%} } PK8LJZBB$fastgenomics-1.1.0.dist-info/LICENSEMIT License Copyright (c) 2017 FASTGenomics Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PK!H>*RQ"fastgenomics-1.1.0.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,rzd&Y)r$[)T&UrPK!Hcf3%fastgenomics-1.1.0.dist-info/METADATAWmo6_Դ[ytI۬i7hDj$C߾;%;CZL0,x<~̲3F(9/Crj>3fKUܐ50>_q[L/x.0fiK%iɫk^ˉs;LJx">Թṍ>]Sy[֭欀+ QST룏Jgp*tuVWqt. ;iU?(=FXT/g>OLWh_Ck=`ϩt=A[М[*?_SHT~p@W3zx .yHFQ Nz0֘^E@Ju!1,CbE,O[rr*{ϫJuew=d5Oo%NWiΪ|,_/PK!H+Q#fastgenomics-1.1.0.dist-info/RECORDɲH}= X$^ 2 8l2)>}q#;<'h3ش5Ja4[!(+SShyp(U8J'鄺\^34#}z8SE<4ĂT(jGnsiy.\u-({7j:o;<[U`J:jplm7+Lzmb*Sd& qtK(BEd˫bz]hɚMPC t Tɚ1( t[~M=^3 ¨}lꌪ\̏cKb=mOʥHީ.+0jNKӬEoMBhh\'MJI[wWK] 껈*<8ړ{'Y[Zw7ʪ|cOnU0_j_Q^ f֭'cA1LÓɰ/츻7%x\+buEÈ&m$[۾^KJaO"ʤ*Ip+j3Fǝ,ߴi>:1⩰#>Ҵ୏.l1yP O 6eO܈m믛3Np b7vt\4P0d]Yڻ 7PK{gMbQ<fastgenomics/__init__.pyPK8L=?  fastgenomics/_resources.pyPKT(N] fastgenomics/app.pyPKKPM7Q  fastgenomics/app_creator.pyPKT(N  fastgenomics/data.pyPKT(N!_Xfastgenomics/defaults.pyPKT(N Sfastgenomics/deprecated.pyPKT(NѲh%fastgenomics/io.pyPKT(N9,fastgenomics/parameters.pyPKT(Nil_R* * 4fastgenomics/process.pyPK8L im=fastgenomics/tools.pyPKT(N!x?fastgenomics/external/__init__.pyPKT(N8A ?fastgenomics/external/anndata.pyPKH(Ni)Jfastgenomics/schemes/manifest_schema.jsonPK8L--,]fastgenomics/templates/docker-compose.yml.j2PK8L笌1afastgenomics/templates/input_file_mapping.json.j2PK8LJZBB$afastgenomics-1.1.0.dist-info/LICENSEPK!H>*RQ"effastgenomics-1.1.0.dist-info/WHEELPK!Hcf3%ffastgenomics-1.1.0.dist-info/METADATAPK!H+Q#mfastgenomics-1.1.0.dist-info/RECORDPKq