PK1kO6fgread/__init__.py# coding: utf-8 """Module for reading files shared on FASTGenomics""" from .read import list_data_sets, read_data_set, read_data_sets from get_version import get_version __version__ = get_version(__file__) __author__ = "Paweł Biernat" del get_version PK1kOҦZb b fgread/read.pyimport re from pathlib import Path import json from . import readers DEFAULT_READERS = { "Loom": readers.read_loom, "Seurat Object": readers.read_seurat, "AnnData": readers.read_anndata, "10x h5": readers.read_10x_hdf5, "Drop-Seq": readers.read_dropseq, } DATA_DIR = "/fastgenomics/data" class DataSet(object): """Represents a data set on FASTGenomics, including the relative location and the contents of the metadata.json file. """ def __init__(self, path): self.path = path if not self.path.exists(): raise FileNotFoundError(filename=self.path) self.metadata = self.read_metadata() self.format = self.metadata["format"] self.title = self.metadata["title"] self.file = self.path / self.metadata["file"] self.id = int(self.path.name.split("_")[-1]) def read_metadata(self): with open(self.path / "metadata.json") as f: return json.load(f) def __repr__(self): return f"DataSet: {self.title} [{self.format}]" def read_data_set(dataset: DataSet, additional_readers={}): """Reads a single data set. Dispatches to specific readers based on the contents of the `dataset.format`. """ format = dataset.format title = dataset.title path = dataset.path readers = {**DEFAULT_READERS, **additional_readers} if format in readers: print( f'Loading data set "{title}" in format "{format} from directory "{path}".' ) adata = readers[format](dataset) adata.uns["metadata"] = dataset.metadata adata.var["fg_title"] = dataset.title adata.var["fg_id"] = dataset.id return adata else: raise KeyError(f'Unsupported format "{format}", use one of {readers}') def list_data_sets(data_dir=DATA_DIR): """Lists available data sets.""" data_dir = Path(data_dir) paths = [ f for f in data_dir.iterdir() if f.is_dir() and re.match(r"^dataset_\d{4}$", f.name) ] return {dataset.id: dataset for dataset in map(DataSet, paths)} def read_data_sets(datasets=None, additional_readers={}, data_dir=DATA_DIR): """Reads all data sets.""" datasets = datasets or list_data_sets(data_dir) return { id: read_data_set(dataset, additional_readers=additional_readers) for id, dataset in datasets.items() } PKsOm[Оfgread/readers.pyimport anndata import re import numpy as np import pandas as pd import scipy.sparse as sp def read_loom(dataset): """Reads a data set in the loom format.""" adata = anndata.read_loom(dataset.file) return adata def read_seurat(dataset): """Reads a data set in the Seurat format (not implemented).""" raise NotImplementedError("Reading of Seurat files not implemented.") def read_anndata(dataset): """Reads a data set in the AnnData format.""" adata = anndata.read_h5ad(dataset.file) return adata def read_10x_hdf5(dataset): """Reads a data set in the 10x hdf5 format.""" adata = anndata.read_10x_h5(dataset.file) return adata def read_dropseq(dataset): """Reads a data set in the DropSeq format.""" file = dataset.file with open(file) as f: cells = f.readline().replace('"', "").split("\t") samples = [re.search("(.*)_", c).group(1) for c in cells] genes = pd.read_csv( file, sep="\t", skiprows=1, usecols=(0,), header=None, names=["GeneID"] ).set_index("GeneID") X = np.loadtxt( file, delimiter="\t", skiprows=1, usecols=range(1, len(cells) + 1), dtype=np.float32, ).T X = sp.csr_matrix(X) var = genes obs = pd.DataFrame( samples, columns=["sample"], index=pd.Series(cells, name="CellID") ) adata = anndata.AnnData(X=X, var=var, obs=obs) return adata PK!HMuSafgread-0.0.5.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,szd&Y)r$[)T&UD"PK!HDKfgread-0.0.5.dist-info/METADATAmN0~ sk"U$hABu6Kl{g!--?'KޝovfSRj =`V“K!*  cRa^-ca5˸GYe\^5VqJF%?,L"\  xϏ^+GuփOS%YЂh^YwDŽrcK緶?Yz~blE7Nt?u41ԶDz6ܸ ౻CQpe֊E PK!H6a#fgread-0.0.5.dist-info/RECORDuιr@gHȱ rpEyL&7^ 8=o)INe x᥾FUJE*1\@J88M#}Ab|sK `!oxPԭbT#A MR4Ό qm"QpXO~4(`8VH):WDV]~F=;@e"్wGKl-#rɻNNH;{ Bv(nҷ ( V0UQ~ c 6_PK1kO6fgread/__init__.pyPK1kOҦZb b 1fgread/read.pyPKsOm[О fgread/readers.pyPK!HMuSafgread-0.0.5.dist-info/WHEELPK!HDKfgread-0.0.5.dist-info/METADATAPK!H6a#fgread-0.0.5.dist-info/RECORDPK