PKWM scvelo/__init__.py"""scvelo - stochastic single cell RNA velocity""" from .get_version import get_version __version__ = get_version(__file__) del get_version from .read_load import AnnData, read, read_loom, load from .logging import settings from .tools.run import run_all from .tools.velocity import Velocity from .tools.velocity_graph import VelocityGraph from . import preprocessing as pp from . import tools as tl from . import plotting as pl from . import datasets from . import logging from . import utilsPKQM}rrscvelo/datasets.py"""Builtin Datasets. """ from .read_load import read, load from .preprocessing.utils import cleanup import numpy as np def toy_data(n_obs): """Random samples from Dentate Gyrus. """ adata = dentategyrus() indices = np.random.choice(adata.n_obs, n_obs) adata = adata[indices] adata.obs_names = np.array(range(adata.n_obs), dtype='str') adata.var_names_make_unique() return adata def dentategyrus(): """Dentate Gyrus dataset from Hochgerner et al. (2018). Dentate gyrus is part of the hippocampus involved in learning, episodic memory formation and spatial coding. It is measured using 10X Genomics Chromium and described in Hochgerner et al. (2018). The data consists of 25,919 genes across 3,396 cells and provides several interesting characteristics. """ filename = 'data/DentateGyrus/10X43_1.loom' url = 'http://pklab.med.harvard.edu/velocyto/DG1/10X43_1.loom' adata = read(filename, backup_url=url, cleanup=True, sparse=True, cache=True) cleanup(adata, clean='all', keep={'spliced', 'unspliced', 'ambiguous'}) url_louvain = 'https://github.com/theislab/scvelo_notebooks/raw/master/write/DG_clusters.npy' url_umap = 'https://github.com/theislab/scvelo_notebooks/raw/master/write/DG_umap.npy' adata.obs['louvain'] = load('./data/DentateGyrus/DG_clusters.npy', url_louvain) adata.obsm['X_umap'] = load('./data/DentateGyrus/DG_umap.npy', url_umap) adata._sanitize() return adata def forebrain(): """Developing human forebrain. Forebrain tissue of a week 10 embryo, focusing on the glutamatergic neuronal lineage. """ filename = 'data/ForebrainGlut/hgForebrainGlut.loom' url = 'http://pklab.med.harvard.edu/velocyto/hgForebrainGlut/hgForebrainGlut.loom' adata = read(filename, backup_url=url, cleanup=True, sparse=True, cache=True) adata.var_names_make_unique() return adata PKL+Mbxscvelo/get_version.py""" A minimalistic version helper in the spirit of versioneer, that is able to run without build step using pkg_resources. Developed by P Angerer, see https://github.com/flying-sheep/get_version. """ import re import os from pathlib import Path from subprocess import run, PIPE, CalledProcessError from typing import NamedTuple, List, Union, Optional RE_VERSION = r"([\d.]+?)(?:\.dev(\d+))?(?:[_+-]([0-9a-zA-Z.]+))?" RE_GIT_DESCRIBE = r"v?(?:([\d.]+)-(\d+)-g)?([0-9a-f]{7})(-dirty)?" ON_RTD = os.environ.get("READTHEDOCS") == "True" def match_groups(regex, target): match = re.match(regex, target) if match is None: raise re.error(f"Regex does not match “{target}”. RE Pattern: {regex}", regex) return match.groups() class Version(NamedTuple): release: str dev: Optional[str] labels: List[str] @staticmethod def parse(ver): release, dev, labels = match_groups(f"{RE_VERSION}$", ver) return Version(release, dev, labels.split(".") if labels else []) def __str__(self): release = self.release if self.release else "0.0" dev = f".dev{self.dev}" if self.dev else "" labels = f'+{".".join(self.labels)}' if self.labels else "" return f"{release}{dev}{labels}" def get_version_from_dirname(name, parent): """Extracted sdist""" parent = parent.resolve() re_dirname = re.compile(f"{name}-{RE_VERSION}$") if not re_dirname.match(parent.name): return None return Version.parse(parent.name[len(name) + 1 :]) def get_version_from_git(parent): parent = parent.resolve() try: p = run(["git", "rev-parse", "--show-toplevel"], cwd=parent, stdout=PIPE, stderr=PIPE, encoding="utf-8", check=True) except (OSError, CalledProcessError): return None if Path(p.stdout.rstrip("\r\n")).resolve() != parent.resolve(): return None p = run(["git", "describe", "--tags", "--dirty", "--always", "--long", "--match", "v[0-9]*"], cwd=parent, stdout=PIPE, stderr=PIPE, encoding="utf-8", check=True) release, dev, hex_, dirty = match_groups( f"{RE_GIT_DESCRIBE}$", p.stdout.rstrip("\r\n") ) labels = [] if dev == "0": dev = None else: labels.append(hex_) if dirty and not ON_RTD: labels.append("dirty") return Version(release, dev, labels) def get_version_from_metadata(name: str, parent: Optional[Path] = None): try: from pkg_resources import get_distribution, DistributionNotFound except ImportError: return None try: pkg = get_distribution(name) except DistributionNotFound: return None # For an installed package, the parent is the install location path_pkg = Path(pkg.location).resolve() if parent is not None and path_pkg != parent.resolve(): msg = f"""\ metadata: Failed; distribution and package paths do not match: {path_pkg} != {parent.resolve()}\ """ return None return Version.parse(pkg.version) def get_version(package: Union[Path, str]) -> str: """Get the version of a package or module Pass a module path or package name. The former is recommended, since it also works for not yet installed packages. Supports getting the version from #. The directory name (as created by ``setup.py sdist``) #. The output of ``git describe`` #. The package metadata of an installed package (This is the only possibility when passing a name) Args: package: package name or module path (``…/module.py`` or ``…/module/__init__.py``) """ path = Path(package) if not path.suffix and len(path.parts) == 1: # Is probably not a path v = get_version_from_metadata(package) if v: return str(v) if path.suffix != ".py": msg = f"“package” is neither the name of an installed module nor the path to a .py file." if path.suffix: msg += f" Unknown file suffix {path.suffix}" raise ValueError(msg) if path.name == "__init__.py": name = path.parent.name parent = path.parent.parent else: name = path.with_suffix("").name parent = path.parent return str( get_version_from_dirname(name, parent) or get_version_from_git(parent) or get_version_from_metadata(name, parent) or "0.0.0" ) __version__ = get_version(__file__) if __name__ == "__main__": print(__version__)PKPMɀscvelo/logging.pyfrom scanpy import logging as logg from scanpy import settings settings.verbosity = 3 # global verbosity level: show errors(0), warnings(1), info(2) and hints(3) settings._frameon = False import sys from datetime import datetime from time import time def get_passed_time(): now = time() elapsed = now - settings._previous_time settings._previous_time = now from functools import reduce elapsed = "%d:%02d:%02d.%02d" % reduce(lambda ll, b: divmod(ll[0], b) + ll[1:], [(elapsed*100,), 100, 60, 60]) return elapsed def print_version(): from . import __version__ logg._write_log('Running scvelo', __version__, 'on {}.'.format(datetime.now().strftime("%Y-%m-%d %H:%M"))) def print_versions(): for mod in ['scvelo', 'scanpy', 'anndata', 'loompy', 'numpy', 'scipy', 'matplotlib', 'sklearn', 'pandas']: mod_name = mod[0] if isinstance(mod, tuple) else mod mod_install = mod[1] if isinstance(mod, tuple) else mod try: print('{}=={}'.format(mod_install, __import__(mod_name).__version__), end=' ') except (ImportError, AttributeError): pass print() class ProgressReporter: def __init__(self, total, interval=3): self.count = 0 self.total = total self.timestamp = time() self.interval = interval def update(self): self.count += 1 if time() - self.timestamp > self.interval or self.count == self.total: self.timestamp = time() percent = int(self.count * 100 / self.total) sys.stdout.write('\r' + '... %d%%' % percent) sys.stdout.flush() def finish(self): sys.stdout.write('\r') sys.stdout.flush() from anndata.logging import print_memory_usage print_version_and_date = print_version PKWM-scvelo/read_load.pyimport os, re import numpy as np import pandas as pd from urllib.request import urlretrieve from pathlib import Path from scanpy.api import AnnData, read, read_loom def load(filename, backup_url=None, **kwargs): numpy_ext = {'npy', 'npz'} pandas_ext = {'csv', 'txt'} if not os.path.exists(filename) and backup_url is None: raise FileNotFoundError('Did not find file {}.'.format(filename)) elif not os.path.exists(filename): d = os.path.dirname(filename) if not os.path.exists(d): os.makedirs(d) urlretrieve(backup_url, filename) ext = Path(filename).suffixes[-1][1:] if ext in numpy_ext: return np.load(filename, **kwargs) elif ext in pandas_ext: return pd.read_csv(filename, **kwargs) else: raise ValueError('"{}" does not end on a valid extension.\n' 'Please, provide one of the available extensions.\n{}\n' .format(filename, numpy_ext|pandas_ext)) def clean_obs_names(adata, base='[AGTCBDHKMNRSVWY]', ID_length=12, copy=False): """clean in obs_names and append the rest to obs.sample as follows: obs_name "SomePrefix_AGTGATCCATG_SomeSuffix" is changed into obs_name "AGTGATCCATG" and obs.sample "SomePrefix_SomeSuffix" according to https://www.neb.com/tools-and-resources/usage-guidelines/the-genetic-code """ # change cell ID "SomePrefix_AGTGATCCATG_SomeSuffix" into: # cell ID "AGTGATCCATG" of sample "SomePrefix_SomeSuffix" # GENETIC CODE: according to https://www.neb.com/tools-and-resources/usage-guidelines/the-genetic-code def get_base_list(name, base): base_list = base while re.search(base_list + base, name) is not None: base_list += base if len(base_list) == 0: raise ValueError('Encountered an invalid ID in obs_names: ', name) return base_list names = adata.obs_names base_list = get_base_list(names[0], base) if len(np.unique([len(name) for name in adata.obs_names])) == 1: start, end = re.search(base_list, names[0]).span() newIDs = [name[start:end] for name in names] start, end = 0, len(newIDs[0]) for i in range(end - ID_length): if np.any([ID[i] not in base for ID in newIDs]): start += 1 if np.any([ID[::-1][i] not in base for ID in newIDs]): end -= 1 newIDs = [ID[start:end] for ID in newIDs] prefixes = [names[i].replace(newIDs[i], '') for i in range(len(names))] else: prefixes, newIDs = [], [] for name in names: match = re.search(base_list, name) newID = re.search(get_base_list(name, base), name).group() if match is None else match.group() newIDs.append(newID) prefixes.append(name.replace(newID, '')) adata.obs_names = newIDs if len(prefixes[0]) > 0 and len(np.unique(prefixes)) > 1: #idx_names = np.random.choice(len(names), size=20, replace=False) #for i in range(len(names[0])): # if np.all([re.search(names[0][:i], names[ix]) for ix in idx_names]) is not None: obs_key = names[0][:i] adata.obs['sample_batch'] = prefixes adata._sanitize() adata.obs_names_make_unique() return adata if copy else None def merge(adata, ldata): common_obs = adata.obs_names.intersection(ldata.obs_names) if len(common_obs) == 0: clean_obs_names(adata) clean_obs_names(ldata) common_obs = adata.obs_names.intersection(ldata.obs_names) _adata = adata.copy() if adata.shape[1] >= ldata.shape[1] else ldata.copy() _ldata = ldata.copy() if adata.shape[1] >= ldata.shape[1] else adata.copy() _adata = _adata[common_obs] _ldata = _ldata[common_obs] for attr in _ldata.obs.keys(): _adata.obs[attr] = _ldata.obs[attr] for attr in _ldata.obsm.keys(): _adata.obsm[attr] = _ldata.obsm[attr] for attr in _ldata.uns.keys(): _adata.uns[attr] = _ldata.uns[attr] for attr in _ldata.layers.keys(): _adata.layers[attr] = _ldata.layers[attr] if _adata.shape[1] == _ldata.shape[1]: if np.all(adata.var_names == ldata.var_names): for attr in _ldata.var.keys(): _adata.var[attr] = _ldata.var[attr] for attr in _ldata.varm.keys(): _adata.varm[attr] = _ldata.varm[attr] else: raise ValueError('Variable names are not identical.') return _adata PKWMscvelo/utils.pyfrom .preprocessing.utils import show_proportions, cleanup, set_initial_size, get_initial_size from .tools.utils import prod_sum_obs, prod_sum_var, norm, vector_norm, R_squared, \ cosine_correlation, normalize, scale, get_indices, get_iterative_indices, groups_to_bool from .tools.rank_velocity_genes import get_mean_var from .tools.run import convert_to_adata, convert_to_loom from .tools.solver import solve_cov, solve2_inv, solve2_mle from .tools.velocity_confidence import random_subsample from .tools.velocity_graph import vals_to_csr from .plotting.utils import is_categorical, clip, interpret_colorkey from .plotting.velocity_embedding_grid import compute_velocity_on_grid from .read_load import clean_obs_names, merge PK|AMlSscvelo/plotting/__init__.pyfrom .scatter import scatter from .velocity import velocity from .velocity_embedding import velocity_embedding from .velocity_embedding_grid import velocity_embedding_grid from .utils import hist PK,MS* * scvelo/plotting/docs.py"""Shared docstrings for plotting function parameters. """ from textwrap import dedent def doc_params(**kwds): """\ Docstrings should start with "\" in the first line for proper formatting. """ def dec(obj): obj.__doc__ = dedent(obj.__doc__).format(**kwds) return obj return dec doc_scatter = """\ basis: `str` (default='umap') Key for embedding. color: `str`, list of `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes use_raw : `bool` (default: `None`) Use `raw` attribute of `adata` if present. layer: `str`, list of `str` or `None` (default: `None`) Specify the layer for `color`. color_map: `str` (default: `matplotlib.rcParams['image.cmap']`) String denoting matplotlib color map. colorbar: `bool` (default: `False`) Whether to show colorbar. palette: list of `str` (default: `None`) Colors to use for plotting groups (categorical annotation). size: `float` (default: 5) Point size. alpha: `float` (default: 1) Set blending - 0 transparent to 1 opaque. perc: tuple, e.g. [2,98] (default: `None`) Specify percentile for continuous coloring. sort_order: `bool` (default: `True`) For continuous annotations used as color parameter, plot data points with higher values on top of others. groups: `str` (default: `all groups`) Restrict to a few categories in categorical observation annotation. components: `str` or list of `str` (default: '1,2') For instance, ['1,2', '2,3']. projection: {'2d', '3d'} (default: '2d') Projection of plot. legend_loc: str (default: 'none') Location of legend, either 'on data', 'right margin' or valid keywords for matplotlib.legend. legend_fontsize: `int` (default: `None`) Legend font size. legend_fontweight: {'normal', 'bold', ...} (default: `None`) Legend font weight. Defaults to 'bold' if `legend_loc = 'on data'`, otherwise to 'normal'. Available are `['light', 'normal', 'medium', 'semibold', 'bold', 'heavy', 'black']`. right_margin: `float` or list of `float` (default: `None`) Adjust the width of the space right of each plotting panel. left_margin: `float` or list of `float` (default: `None`) Adjust the width of the space left of each plotting panel. xlabel: `str` (default: `None`) Label of x-axis. ylabel: `str` (default: `None`) Label of y-axis. title: `str` (default: `None`) Provide title for panels either as, e.g. `["title1", "title2", ...]`. fontsize: `float` (default: `None`) Label font size. figsize: tuple (default: `(7,5)`) Figure size. dpi: `int` (default: 80) Figure dpi. frameon: `bool` (default: `True`) Draw a frame around the scatter plot. show: `bool`, optional (default: `None`) Show the plot, do not return axis. save: `bool` or `str`, optional (default: `None`) If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}. ax: `matplotlib.Axes`, optional (default: `None`) A matplotlib axes object. Only works if plotting a single component.\ """PK Mv77scvelo/plotting/pseudotime.pyfrom .utils import * def principal_curve(adata): X_curve = adata.uns['principal_curve']['projections'] ixsort = adata.uns['principal_curve']['ixsort'] pl.plot(X_curve[ixsort, 0], X_curve[ixsort, 1], c="k", lw=3, zorder=2000000) def pseudotime(adata, gene_list, ckey='velocity', reverse=False): ixsort = adata.uns['principal_curve']['ixsort'] arclength = adata.uns['principal_curve']['arclength'] if reverse: arclength /= np.max(arclength) else: arclength = (np.max(arclength) - arclength) / np.max(arclength) cell_subset = adata.uns['principal_curve']['cell_subset'] adata_subset = adata[cell_subset].copy() gs = pl.GridSpec(1, len(gene_list)) for n, gene in enumerate(gene_list): i = np.where(adata_subset.var_names == gene)[0][0] ax = pl.subplot(gs[n]) lb, ub = np.percentile(adata_subset.obsm[ckey][:, i], [.5, 99.5]) c = np.clip(adata_subset.obsm[ckey][ixsort, i], lb, ub) # pl.scatter(arclength[ixsort], adata2.obsm['Mu'][ixsort, i], alpha=0.7, c='b', s=5, label="unspliced") pl.scatter(arclength[ixsort], adata_subset.obsm['Ms'][ixsort, i] * adata_subset.uns['velocity_pars']['gamma'][i], c=c, cmap='coolwarm', alpha=1, s=1, label="spliced") c = c / abs(c).max() * (adata_subset.obsm['Ms'][ixsort, i] * adata_subset.uns['velocity_pars']['gamma'][i]).max() z = np.ma.polyfit(arclength[ixsort], c, 8) fun = np.poly1d(z) pl.plot(arclength[ixsort], fun(arclength[ixsort]), label=ckey) # Hide the right and top spines ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) pl.ylabel(gene) pl.title('Colored by ' + ckey) PKXWM+tYscvelo/plotting/scatter.pyfrom .utils import is_categorical, update_axes, set_label, set_title, default_basis, default_color, \ default_color_map, interpret_colorkey, get_components, set_colorbar, savefig from .docs import doc_scatter, doc_params import scanpy.api.pl as scpl import matplotlib.pyplot as pl import numpy as np import pandas as pd from scipy.sparse import issparse @doc_params(scatter=doc_scatter) def scatter(adata, x=None, y=None, basis=None, vkey=None, color=None, use_raw=None, layer=None, color_map=None, colorbar=False, palette=None, size=5, alpha=1, perc=None, sort_order=True, groups=None, components=None, projection='2d', legend_loc='none', legend_fontsize=None, legend_fontweight=None, right_margin=None, left_margin=None, xlabel=None, ylabel=None, title=None, fontsize=None, figsize=(7,5), dpi=100, frameon=None, show=True, save=None, ax=None, **kwargs): """\ Scatter plot along observations or variables axes. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate {scatter} Returns ------- If `show==False` a `matplotlib.Axis` """ colors = pd.unique(color) if isinstance(color, (list, tuple, np.record)) else [color] layers = pd.unique(layer) if isinstance(layer, (list, tuple, np.ndarray, np.record)) else [layer] bases = pd.unique(basis) if isinstance(basis, (list, tuple, np.ndarray, np.record)) else [basis] if len(colors) > 1: for i, gs in enumerate(pl.GridSpec(1, len(colors), pl.figure(None, (figsize[0]*len(colors), figsize[1]), dpi=dpi))): scatter(adata, x=x, y=y, basis=basis, layer=layer, color=colors[i], xlabel=xlabel, ylabel=ylabel, color_map=color_map, perc=perc, size=size, alpha=alpha, fontsize=fontsize, frameon=frameon, title=title, show=False, colorbar=colorbar, components=components, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), use_raw=use_raw, sort_order=sort_order, groups=groups, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, palette=palette, right_margin=right_margin, left_margin=left_margin, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(layers) > 1: for i, gs in enumerate(pl.GridSpec(1, len(layers), pl.figure(None, (figsize[0] * len(layers), figsize[1]), dpi=dpi))): scatter(adata, x=x, y=y, basis=basis, layer=layers[i], color=color, xlabel=xlabel, ylabel=ylabel, color_map=color_map, perc=perc, size=size, alpha=alpha, fontsize=fontsize, frameon=frameon, title=title, show=False, colorbar=colorbar, components=components, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), use_raw=use_raw, sort_order=sort_order, groups=groups, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, palette=palette, right_margin=right_margin, left_margin=left_margin, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(bases) > 1: for i, gs in enumerate(pl.GridSpec(1, len(bases), pl.figure(None, (figsize[0] * len(bases), figsize[1]), dpi=dpi))): scatter(adata, x=x, y=y, basis=bases[i], layer=layer, color=color, xlabel=xlabel, ylabel=ylabel, color_map=color_map, perc=perc, size=size, alpha=alpha, fontsize=fontsize, frameon=frameon, title=title, show=False, colorbar=colorbar, components=components, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), use_raw=use_raw, sort_order=sort_order, groups=groups, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, palette=palette, right_margin=right_margin, left_margin=left_margin, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: ax = pl.figure(None, figsize, dpi=dpi).gca() if ax is None else ax color, layer = colors[0], layers[0] color = default_color(adata) if color is None else color color_map = default_color_map(adata, color) if color_map is None else color_map is_embedding = ((x is None) | (y is None)) and basis not in adata.var_names basis = default_basis(adata) if basis is None and is_embedding else basis if is_categorical(adata, color) and is_embedding: ax = scpl.scatter(adata, basis=basis, color=color, use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, right_margin=right_margin, left_margin=left_margin, size=size, title=title, frameon=frameon, show=False, save=None, ax=ax, **kwargs) else: if basis in adata.var_names: x = adata[:, basis].layers['spliced'] if use_raw else adata[:, basis].layers['Ms'] y = adata[:, basis].layers['unspliced'] if use_raw else adata[:, basis].layers['Mu'] x, y = x.A if issparse(x) else x, y.A if issparse(y) else y xlabel, ylabel, title = 'spliced', 'unspliced', basis frameon = True if frameon is None else frameon elif is_embedding: X_emb = adata.obsm['X_' + basis][:, get_components(components)] x, y = X_emb[:, 0], X_emb[:, 1] elif isinstance(x, str) and isinstance(y, str) and x in adata.var_names and y in adata.var_names: x = adata[:, x].layers[layer] if layer in adata.layers.keys() else adata[:, x].X y = adata[:, y].layers[layer] if layer in adata.layers.keys() else adata[:, y].X c = interpret_colorkey(adata, color, layer, perc) if layer is not None and 'velocity' in layer and isinstance(color, str) and color in adata.var_names: ub = np.percentile(np.abs(c), 98) kwargs.update({"vmin": -ub, "vmax": ub}) pl.scatter(x, y, c=c, cmap=color_map, s=size, alpha=alpha, zorder=0, **kwargs) set_label(xlabel, ylabel, fontsize, basis) set_title(title, layer, color, fontsize) ax = update_axes(ax, fontsize, is_embedding, frameon) if basis in adata.var_names: xnew = np.linspace(0, x.max() * 1.02) fits = [fit for fit in adata.layers.keys() if all(['velocity' in fit, fit + '_gamma' in adata.var.keys()])] for fit in fits: linestyle = '--' if 'variance_' + fit in adata.layers.keys() else '-' gamma = adata[:, basis].var[fit + '_gamma'].values if fit + '_gamma' in adata.var.keys() else 1 beta = adata[:, basis].var[fit + '_beta'].values if fit + '_beta' in adata.var.keys() else 1 offset = adata[:, basis].var[fit + '_offset'].values if fit + '_offset' in adata.var.keys() else 0 pl.plot(xnew, gamma / beta * xnew + offset / beta, c='k', linestyle=linestyle) if colorbar and not is_categorical(adata, color): set_colorbar(ax) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return axPKTVMo#scvelo/plotting/utils.pyfrom .. import settings import numpy as np import matplotlib.pyplot as pl from matplotlib.ticker import MaxNLocator from mpl_toolkits.axes_grid1.inset_locator import inset_axes from scanpy.plotting.utils import savefig_or_show, default_palette, adjust_palette from matplotlib.colors import is_color_like from pandas.api.types import is_categorical as cat from scipy.sparse import issparse def is_categorical(adata, c): adata._sanitize() # Indentify array of categorical type and transform where applicable return isinstance(c, str) and (c in adata.obs.keys() and cat(adata.obs[c]) or is_color_like(c)) def default_basis(adata): keys = [key for key in ['pca', 'tsne', 'umap'] if 'X_' + key in adata.obsm.keys()] if len(keys) > 0: return keys[-1] else: raise ValueError('No basis specified') def update_axes(ax, fontsize, is_embedding, frameon): frameon = settings._frameon if frameon is None else frameon if frameon: if is_embedding: ax.tick_params(which='both', bottom=False, left=False, labelbottom=False, labelleft=False) else: ax.xaxis.set_major_locator(MaxNLocator(nbins=3, integer=True)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3, integer=True)) labelsize = int(fontsize * .75) if fontsize is not None else None ax.tick_params(axis='both', which='major', labelsize=labelsize) else: ax.set_xlabel('') ax.set_ylabel('') ax.tick_params(which='both', bottom=False, left=False, labelbottom=False, labelleft=False) ax.set_frame_on(False) return ax def set_label(xlabel, ylabel, fontsize=None, basis=None): if isinstance(xlabel, str) and isinstance(ylabel, str): pl.xlabel(xlabel, fontsize=fontsize) pl.ylabel(ylabel, fontsize=fontsize) elif basis is not None: component_name = ('DC' if basis == 'diffmap' else 'tSNE' if basis == 'tsne' else 'UMAP' if basis == 'umap' else 'PC' if basis == 'pca' else basis.replace('draw_graph_', '').upper() if 'draw_graph' in basis else basis) pl.xlabel(component_name + '1') pl.ylabel(component_name + '2') def set_title(title, layer=None, color=None, fontsize=None): if isinstance(title, str): pl.title(title, fontsize=fontsize) elif isinstance(layer, str) and isinstance(color, str): pl.title(color + ' ' + layer, fontsize=fontsize) elif isinstance(color, str): pl.title(color, fontsize=fontsize) def set_frame(ax, frameon): frameon = settings._frameon if frameon is None else frameon if not frameon: ax.set_xlabel('') ax.set_ylabel('') ax.set_frame_on(False) return ax def default_color(adata): return 'clusters' if 'clusters' in adata.obs.keys() else 'louvain' if 'louvain' in adata.obs.keys() else 'grey' def default_color_map(adata, c): return 'viridis_r' if isinstance(c, str) and c in adata.obs.keys() and not is_categorical(adata, c)\ and adata.obs[c].min() == 0 and adata.obs[c].max() == 1 else 'RdBu_r' def clip(c, perc=None): if isinstance(perc, int): perc = [perc, 100] if perc < 50 else [0, perc] lb, ub = np.percentile(c, perc) return np.clip(c, lb, ub) def get_colors(adata, c): if is_color_like(c): return c else: if c+'_colors' not in adata.uns.keys(): palette = default_palette(None) palette = adjust_palette(palette, length=len(adata.obs[c].cat.categories)) adata.uns[c + '_colors'] = palette[:len(adata.obs[c].cat.categories)].by_key()['color'] cluster_ix = adata.obs[c].cat.codes return np.array([adata.uns[c + '_colors'][cluster_ix[i]] for i in range(adata.n_obs)]) def interpret_colorkey(adata, c=None, layer=None, perc=None): if c is None: c = default_color(adata) if is_categorical(adata, c): c = get_colors(adata, c) elif isinstance(c, str): if c in adata.obs.keys(): # color by observation key c = adata.obs[c] elif c in adata.var_names: # color by var in specific layer c = adata[:, c].layers[layer] if layer in adata.layers.keys() else adata[:, c].X c = c.A.flatten() if issparse(c) else c else: raise ValueError('color key is invalid! pass valid observation annotation or a gene name') if perc is not None: c = clip(c, perc=perc) elif len(np.array(c).flatten()) == adata.n_obs: # continuous coloring c = np.array(c).flatten() if perc is not None: c = clip(c, perc=perc) else: raise ValueError('color key is invalid! pass valid observation annotation or a gene name') return c def get_components(components=None): if components is None: components = '1,2' if isinstance(components, str): components = components.split(',') return np.array(components).astype(int) - 1 def set_colorbar(ax, orientation='vertical'): cb = pl.colorbar(orientation=orientation, cax=inset_axes(ax, width="2%", height="30%", loc=4, borderpad=0)) cb.set_alpha(1) cb.draw_all() cb.locator = MaxNLocator(nbins=3, integer=True) cb.update_ticks() def savefig(writekey, show=False, dpi=None, save=None): """Save current figure to file. """ savefig_or_show('velocity_' + writekey + '_' if writekey != '' else 'velocity_', dpi=dpi, save=save, show=show) def hist(arrays, bins, alpha=.5, colors=None, labels=None, xlabel=None, ylabel=None, ax=None): ax = pl.figure(None, (8, 4), dpi=120) if ax is None else ax arrays = arrays if isinstance(arrays, (list, tuple)) else [arrays] palette = default_palette(None)[::3][:len(arrays)].by_key()['color'] colors = palette if colors is None or len(colors) < len(arrays) else colors for i, array in enumerate(arrays): pl.hist(array, bins=bins, alpha=alpha, color=colors[i], label=labels[i] if labels is not None else None) pl.legend() pl.xlabel(xlabel if xlabel is not None else '') pl.ylabel(ylabel if xlabel is not None else '') pl.show() # def phase(adata, var=None, x=None, y=None, color='louvain', fits='all', xlabel='spliced', ylabel='unspliced', # fontsize=None, show=True, ax=None, **kwargs): # if isinstance(var, str) and (var in adata.var_names): # if (x is None) or (y is None): # ix = np.where(adata.var_names == var)[0][0] # x, y = adata.layers['Ms'][:, ix], adata.layers['Mu'][:, ix] # else: # ValueError('var not found in adata.var_names.') # # ax = scatter(adata, x=x, y=y, color=color, frameon=True, title=var, xlabel=xlabel, ylabel=ylabel, ax=ax, **kwargs) # # xnew = np.linspace(0, x.max() * 1.02) # fits = adata.layers.keys() if fits == 'all' else fits # fits = [fit for fit in fits if 'velocity' in fit] # for fit in fits: # linestyle = '--' if 'stochastic' in fit else '-' # pl.plot(xnew, adata.var[fit+'_gamma'][ix] / adata.var[fit+'_beta'][ix] * xnew # + adata.var[fit+'_offset'][ix] / adata.var[fit+'_beta'][ix], c='k', linestyle=linestyle) # # if show: pl.show() # else: return axPK͂TMimscvelo/plotting/velocity.pyfrom ..preprocessing.moments import second_order_moments from ..tools.rank_velocity_genes import rank_velocity_genes from .scatter import scatter from .utils import savefig, default_basis import numpy as np import pandas as pd from matplotlib.ticker import MaxNLocator import matplotlib.pyplot as pl from scipy.sparse import issparse def velocity(adata, var_names=None, basis=None, mode=None, fits='all', layers='all', use_raw=False, color=None, color_map='RdBu_r', perc=[2,98], size=.2, alpha=.5, fontsize=8, figsize=(3,2), dpi=120, show=True, save=None, ax=None, **kwargs): """Phase and velocity plot for set of genes. The phase plot shows spliced against unspliced expressions with steady-state fit. Further the embedding is shown colored by velocity and expression. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. var_names: `str` or list of `str` (default: `None`) Which variables to show. basis: `str` (default: `'umap'`) Key for embedding coordinates. mode: `'stochastic'` or `None` (default: `None`) Whether to show show covariability phase portrait. fits: `str` or list of `str` (default: `'all'`) Which steady-state estimates to show. layers: `str` or list of `str` (default: `'all'`) Which layers to show. color: `str`, list of `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes color_map: `str` (default: `matplotlib.rcParams['image.cmap']`) String denoting matplotlib color map. perc: tuple, e.g. [2,98] (default: `None`) Specify percentile for continuous coloring. size: `float` (default: 5) Point size. alpha: `float` (default: 1) Set blending - 0 transparent to 1 opaque. fontsize: `float` (default: `None`) Label font size. figsize: tuple (default: `(7,5)`) Figure size. dpi: `int` (default: 80) Figure dpi. show: `bool`, optional (default: `None`) Show the plot, do not return axis. save: `bool` or `str`, optional (default: `None`) If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}. ax: `matplotlib.Axes`, optional (default: `None`) A matplotlib axes object. Only works if plotting a single component. """ basis = default_basis(adata) if basis is None else basis var_names = [var_names] if isinstance(var_names, str) else var_names \ if isinstance(var_names, (list, tuple, np.ndarray, np.record)) else rank_velocity_genes(adata, basis=basis, n_genes=4) valid_var_names = adata.var_names[adata.var['velocity_genes']] if 'velocity_genes' in adata.var.keys() else adata.var_names var_names = pd.unique([var for var in var_names if var in valid_var_names]) (skey, ukey) = ('spliced', 'unspliced') if use_raw else ('Ms', 'Mu') layers = ['velocity', skey, 'variance_velocity'] if layers == 'all' else layers layers = [layer for layer in layers if layer in adata.layers.keys()] fits = adata.layers.keys() if fits == 'all' else fits fits = [fit for fit in fits if all(['velocity' in fit, fit + '_gamma' in adata.var.keys()])] stochastic_fits = [fit for fit in fits if 'variance_' + fit in adata.layers.keys()] n_row, n_col = len(var_names), (1 + len(layers) + (mode == 'stochastic')*2) ax = pl.figure(figsize=(figsize[0]*n_col, figsize[1]*n_row), dpi=dpi) if ax is None else ax gs = pl.GridSpec(n_row, n_col, wspace=0.3, hspace=0.5) for v, var in enumerate(var_names): _adata = adata[:, var] s, u = _adata.layers[skey], _adata.layers[ukey] if issparse(s): s, u = s.A, u.A # spliced/unspliced phase portrait with steady-state estimate ax = pl.subplot(gs[v * n_col]) scatter(adata, basis=var, color=color, frameon=True, title=var, xlabel='spliced', ylabel='unspliced', show=False, save=False, ax=ax, fontsize=fontsize, size=size, alpha=alpha, **kwargs) xnew = np.linspace(0, s.max() * 1.02) for fit in fits: linestyle = '--' if fit in stochastic_fits else '-' gamma = _adata.var[fit + '_gamma'].values if fit + '_gamma' in adata.var.keys() else 1 beta = _adata.var[fit + '_beta'].values if fit + '_beta' in adata.var.keys() else 1 offset = _adata.var[fit + '_offset'].values if fit + '_offset' in adata.var.keys() else 0 pl.plot(xnew, gamma / beta * xnew + offset / beta, c='k', linestyle=linestyle) if v == len(var_names)-1: pl.legend(fits, loc='lower right', prop={'size': .5*fontsize}) ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) ax.tick_params(axis='both', which='major', labelsize=.7*fontsize) # velocity and expression plots for l, layer in enumerate(layers): ax = pl.subplot(gs[v*n_col + l + 1]) title = 'expression' if layer == skey else layer scatter(adata, basis=basis, color=var, layer=layer, color_map=color_map, title=title, perc=perc, fontsize=fontsize, size=size, alpha=alpha, show=False, ax=ax, save=False, **kwargs) if mode == 'stochastic': ss, us = second_order_moments(_adata) ss, us = ss.flatten(), us.flatten() fit = stochastic_fits[0] ax = pl.subplot(gs[v*n_col + len(layers) + 1]) offset = _adata.var[fit + '_offset'] if fit + '_offset' in adata.var.keys() else 0 beta = _adata.var[fit + '_beta'] if fit + '_beta' in adata.var.keys() else 1 x = 2 * (ss - s**2) - s y = 2 * (us - u * s) + u + 2 * s * offset / beta scatter(adata, x=x, y=y, color=color, title=var, fontsize=40/n_col, show=False, frameon=True, ax=ax, save=False, perc=perc, xlabel=r'2 $\Sigma_s - \langle s \rangle$', ylabel=r'2 $\Sigma_{us} + \langle u \rangle$', **kwargs) xnew = np.linspace(x.min(), x.max() * 1.02) for fit in stochastic_fits: gamma = _adata.var[fit + '_gamma'].values if fit + '_gamma' in adata.var.keys() else 1 beta = _adata.var[fit + '_beta'].values if fit + '_beta' in adata.var.keys() else 1 offset2 = _adata.var[fit + '_offset2'].values if fit + '_offset2' in adata.var.keys() else 0 pl.plot(xnew, gamma / beta * xnew + offset2 / beta, c='k', linestyle='--') if v == len(var_names) - 1: pl.legend(fits, loc='lower right', prop={'size': 34/n_col}) if isinstance(save, str): savefig('', dpi=dpi, save=save, show=show) if show: pl.show() else: return ax PKXWM%';;%scvelo/plotting/velocity_embedding.pyfrom ..tools.velocity_embedding import velocity_embedding as compute_velocity_embedding from .utils import interpret_colorkey, default_basis, get_components, savefig from .scatter import scatter from .docs import doc_scatter, doc_params from matplotlib.colors import is_color_like import matplotlib.pyplot as pl import numpy as np import pandas as pd @doc_params(scatter=doc_scatter) def velocity_embedding(adata, basis=None, vkey='velocity', density=1, scale=1, X=None, V=None, color=None, use_raw=None, layer=None, color_map=None, colorbar=False, palette=None, size=None, alpha=.2, perc=None, sort_order=True, groups=None, components=None, projection='2d', legend_loc='none', legend_fontsize=None, legend_fontweight=None, right_margin=None, left_margin=None, xlabel=None, ylabel=None, title=None, fontsize=None, figsize=(14,10), dpi=150, frameon=None, show=True, save=None, ax=None, **kwargs): """\ Scatter plot of velocities on the embedding Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate vkey: `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. density: `float` (default: 1) Amount of velocities to show - 0 none to 1 all scale: `float` (default: 1) Length of velocities in the embedding. {scatter} Returns ------- `matplotlib.Axis` if `show==False` """ basis = default_basis(adata) if basis is None else basis colors = pd.unique(color) if isinstance(color, (list, tuple, np.record)) else [color] layers = pd.unique(layer) if isinstance(layer, (list, tuple, np.ndarray, np.record)) else [layer] vkeys = pd.unique(vkey) if isinstance(vkey, (list, tuple, np.ndarray, np.record)) else [vkey] for key in vkeys: if key + '_' + basis not in adata.obsm_keys() and V is None: compute_velocity_embedding(adata, basis=basis, vkey=key) if len(colors) > 1: for i, gs in enumerate(pl.GridSpec(1, len(colors), pl.figure(None, (figsize[0] * len(colors), figsize[1]), dpi=dpi))): velocity_embedding(adata, basis=basis, vkey=vkey, layer=layer, density=density, scale=scale, perc=perc, color=colors[i], use_raw=use_raw, sort_order=sort_order, X=X, V=V, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(layers) > 1: for i, gs in enumerate(pl.GridSpec(1, len(layers), pl.figure(None, (figsize[0] * len(layers), figsize[1]), dpi=dpi))): velocity_embedding(adata, basis=basis, vkey=vkey, layer=layers[i], density=density, scale=scale, perc=None, color=color, use_raw=use_raw, sort_order=sort_order, X=X, V=V, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(vkeys) > 1: for i, gs in enumerate(pl.GridSpec(1, len(vkeys), pl.figure(None, (figsize[0] * len(vkeys), figsize[1]), dpi=dpi))): velocity_embedding(adata, basis=basis, vkey=vkeys[i], layer=layer, density=density, scale=scale, perc=None, color=color, use_raw=use_raw, sort_order=sort_order, X=X, V=V, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: ax = pl.figure(None, figsize, dpi=dpi).gca() if ax is None else ax color, layer, vkey = colors[0], layers[0], vkeys[0] ix_choice = np.random.choice(adata.n_obs, size=int(density * adata.n_obs), replace=False) X = adata.obsm['X_' + basis][:, get_components(components)][ix_choice] if X is None else X[:, :2][ix_choice] V = adata.obsm[vkey + '_' + basis][ix_choice] if V is None else V[:, :2][ix_choice] quiver_kwargs = {"scale": scale, "cmap": color_map, "angles": 'xy', "scale_units": 'xy', "width": .0005, "edgecolors": 'k', "headwidth": 9, "headlength": 10, "headaxislength": 6, "linewidth": .25} quiver_kwargs.update(kwargs) c = interpret_colorkey(adata, color, layer, perc) c = c[ix_choice] if len(c) == adata.n_obs else c if is_color_like(c[0]): pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], color=c, zorder=1, **quiver_kwargs) else: pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], c, zorder=1, **quiver_kwargs) ax = scatter(adata, basis=basis, layer=layer, color=color, xlabel=xlabel, ylabel=ylabel, color_map=color_map, perc=perc, size=size, alpha=alpha, fontsize=fontsize, frameon=frameon, title=title, show=False, colorbar=colorbar, components=components, figsize=figsize, dpi=dpi, save=None, ax=ax, use_raw=use_raw, sort_order=sort_order, groups=groups, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, palette=palette, right_margin=right_margin, left_margin=left_margin) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax PKXWM[ ' '*scvelo/plotting/velocity_embedding_grid.pyfrom ..tools.velocity_embedding import quiver_autoscale, velocity_embedding from .utils import default_basis, get_components, savefig from .scatter import scatter from .docs import doc_scatter, doc_params from sklearn.neighbors import NearestNeighbors from scipy.stats import norm as normal import matplotlib.pyplot as pl import numpy as np import pandas as pd def compute_velocity_on_grid(X_emb, V_emb, density=1, smooth=0.5, n_neighbors=None, min_mass=None): # prepare grid n_obs, n_dim = X_emb.shape grs = [] for dim_i in range(n_dim): m, M = np.min(X_emb[:, dim_i]), np.max(X_emb[:, dim_i]) m = m - .01 * np.abs(M - m) M = M + .01 * np.abs(M - m) gr = np.linspace(m, M, 50 * density) grs.append(gr) meshes_tuple = np.meshgrid(*grs) X_grid = np.vstack([i.flat for i in meshes_tuple]).T # estimate grid velocities if n_neighbors is None: n_neighbors = int(n_obs/50) nn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=-1) nn.fit(X_emb) dists, neighs = nn.kneighbors(X_grid) scale = np.mean([(g[1] - g[0]) for g in grs]) * smooth weight = normal.pdf(x=dists, scale=scale) p_mass = weight.sum(1) V_grid = (V_emb[neighs] * weight[:, :, None]).sum(1) / np.maximum(1, p_mass)[:, None] if min_mass is None: min_mass = np.clip(np.percentile(p_mass, 99) / 100, 1e-2, 1) X_grid, V_grid = X_grid[p_mass > min_mass], V_grid[p_mass > min_mass] V_grid /= 3 * quiver_autoscale(X_grid[:, 0], X_grid[:, 1], V_grid[:, 0], V_grid[:, 1]) return X_grid, V_grid @doc_params(scatter=doc_scatter) def velocity_embedding_grid(adata, basis=None, vkey='velocity', density=1, scale=1, smooth=.5, min_mass=None, n_neighbors=None, X=None, V=None, X_grid=None, V_grid=None, principal_curve=False, color=None, use_raw=None, layer=None, color_map=None, colorbar=False, palette=None, size=None, alpha=.2, perc=None, sort_order=True, groups=None, components=None, projection='2d', legend_loc='none', legend_fontsize=None, legend_fontweight=None, right_margin=None, left_margin=None, xlabel=None, ylabel=None, title=None, fontsize=None, figsize=(7,5), dpi=100, frameon=None, show=True, save=None, ax=None, **kwargs): """\ Scatter plot of velocities for the grid points on the embedding Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate vkey: `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. density: `float` (default: 1) Amount of velocities to show - 0 none to 1 all scale: `float` (default: 1) Length of velocities in the embedding. min_mass: `float` (default: 0.5) Minimum threshold for mass to be shown. smooth: `float` (default: 0.5) Multiplication factor for scale in Gaussian kernel around grid point. n_neighbors: `int` (default: None) Number of neighbors to consider around grid point. X: `np.ndarray` (default: None) embedding grid point coordinates V: `np.ndarray` (default: None) embedding grid velocity coordinates {scatter} Returns ------- `matplotlib.Axis` if `show==False` """ basis = default_basis(adata) if basis is None else basis colors = pd.unique(color) if isinstance(color, (list, tuple, np.record)) else [color] layers = pd.unique(layer) if isinstance(layer, (list, tuple, np.ndarray, np.record)) else [layer] vkeys = pd.unique(vkey) if isinstance(vkey, (list, tuple, np.ndarray, np.record)) else [vkey] for key in vkeys: if key + '_' + basis not in adata.obsm_keys() and V is None: velocity_embedding(adata, basis=basis, vkey=key) color, layer, vkey = colors[0], layers[0], vkeys[0] if X_grid is None or V_grid is None: X_emb = adata.obsm['X_' + basis][:, get_components(components)] if X is None else X[:, :2] V_emb = adata.obsm[vkey + '_' + basis] if V is None else V[:, :2] X_grid, V_grid = compute_velocity_on_grid(X_emb=X_emb, V_emb=V_emb, density=density, smooth=smooth, n_neighbors=n_neighbors, min_mass=min_mass) if len(colors) > 1: for i, gs in enumerate(pl.GridSpec(1, len(colors), pl.figure(None, (figsize[0] * len(colors), figsize[1]), dpi=dpi))): velocity_embedding_grid(adata, basis=basis, vkey=vkey, layer=layer, density=density, scale=scale, X_grid=X_grid, V_grid=V_grid, perc=perc, color=colors[i], min_mass=min_mass, smooth=smooth, n_neighbors=n_neighbors, principal_curve=principal_curve, use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(layers) > 1: for i, gs in enumerate(pl.GridSpec(1, len(layers), pl.figure(None, (figsize[0] * len(layers), figsize[1]), dpi=dpi))): velocity_embedding_grid(adata, basis=basis, vkey=vkey, layer=layers[i], density=density, scale=scale, X_grid=X_grid, V_grid=V_grid, perc=perc, color=color, min_mass=min_mass, smooth=smooth, n_neighbors=n_neighbors, principal_curve=principal_curve, use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(vkeys) > 1: for i, gs in enumerate(pl.GridSpec(1, len(vkeys), pl.figure(None, (figsize[0] * len(vkeys), figsize[1]), dpi=dpi))): velocity_embedding_grid(adata, basis=basis, vkey=vkeys[i], layer=layer, density=density, scale=scale, X_grid=X_grid, V_grid=V_grid, perc=perc, color=color, min_mass=min_mass, smooth=smooth, n_neighbors=n_neighbors, principal_curve=principal_curve, use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: ax = pl.figure(None, figsize, dpi=dpi).gca() if ax is None else ax quiver_kwargs = {"scale": scale, "angles": 'xy', "scale_units": 'xy', "width": .001, "color": 'black', "edgecolors": 'k', "headwidth": 4.5, "headlength": 5, "headaxislength": 3, "linewidth": .2} quiver_kwargs.update(kwargs) pl.quiver(X_grid[:, 0], X_grid[:, 1], V_grid[:, 0], V_grid[:, 1], **quiver_kwargs, zorder=1) if principal_curve: curve = adata.uns['principal_curve']['projections'] pl.plot(curve[:, 0], curve[:, 1], c="w", lw=6, zorder=2) pl.plot(curve[:, 0], curve[:, 1], c="k", lw=3, zorder=3) ax = scatter(adata, basis=basis, layer=layer, color=color, xlabel=xlabel, ylabel=ylabel, color_map=color_map, perc=perc, size=size, alpha=alpha, fontsize=fontsize, frameon=frameon, title=title, show=False, colorbar=colorbar, components=components, figsize=figsize, dpi=dpi, save=None, ax=ax, use_raw=use_raw, sort_order=sort_order, groups=groups, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, palette=palette, right_margin=right_margin, left_margin=left_margin, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax PKnWM{*2 scvelo/preprocessing/__init__.pyfrom .utils import show_proportions, cleanup, filter_genes, filter_genes_dispersion, \ normalize_per_cell, normalize_layers, log1p, filter_and_normalize, recipe_velocity from .moments import pca, neighbors, moments PKnWMdWqqscvelo/preprocessing/moments.pyfrom ..logging import logg, settings from .utils import normalize_layers from scanpy.api.pp import pca, neighbors from scipy.sparse import csr_matrix import numpy as np def get_connectivities(adata, mode='connectivities', recurse_neighbors=False): connectivities = adata.uns['neighbors'][mode] > 0 connectivities.setdiag(1) if recurse_neighbors: connectivities += connectivities.dot(connectivities * .5) connectivities.data = np.clip(connectivities.data, 0, 1) connectivities = connectivities.multiply(1. / connectivities.sum(1)) return connectivities.tocsr().astype(np.float32) def moments(data, n_neighbors=30, n_pcs=30, mode='connectivities', use_rep=None, recurse_neighbors=False, renormalize=False, plot=False, copy=False): """Computes first order moments for velocity estimation. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. n_neighbors: `int` (default: 30) Number of neighbors to use. n_pcs: `int` (default: 30) Number of principal components to use. mode: `'connectivities'` or `'distances'` (default: `'connectivities'`) Distance metric to use for moment computation. renormalize: `bool` (default: `False`) Renormalize the moments by total counts per cell to its median. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes Ms: `.layers` dense matrix with first order moments of spliced counts. Mu: `.layers` dense matrix with first order moments of unspliced counts. """ adata = data.copy() if copy else data if 'neighbors' not in adata.uns.keys() or n_neighbors > adata.uns['neighbors']['params']['n_neighbors']: if 'X_pca' not in adata.obsm.keys() or n_pcs > adata.obsm['X_pca'].shape[1]: pca(adata, n_comps=n_pcs, svd_solver='arpack') if plot: from scanpy.plotting.tools import pca_variance_ratio pca_variance_ratio(adata) neighbors(adata, n_neighbors=n_neighbors, use_rep=('X_pca' if use_rep is None else use_rep), n_pcs=n_pcs) if mode not in adata.uns['neighbors']: raise ValueError('mode can only be \'connectivities\' or \'distances\'') logg.info('computing moments', r=True) normalize_layers(adata) connectivities = get_connectivities(adata, mode, recurse_neighbors=recurse_neighbors) adata.layers['Ms'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['spliced'])).astype(np.float32).A adata.layers['Mu'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['unspliced'])).astype(np.float32).A if renormalize: normalize_layers(adata, layers={'Ms', 'Mu'}) logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.layers`\n' ' \'Ms\', moments of spliced abundances\n' ' \'Mu\', moments of unspliced abundances') return adata if copy else None def second_order_moments(adata): """Computes second order moments for stochastic velocity estimation. Arguments --------- adata: `AnnData` Annotated data matrix. Returns ------- Mss: Second order moments for spliced abundances Mus: Second order moments for spliced with unspliced abundances """ if 'neighbors' not in adata.uns: raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.') connectivities = get_connectivities(adata) s, u = csr_matrix(adata.layers['spliced']), csr_matrix(adata.layers['unspliced']) Mss = csr_matrix.dot(connectivities, s.multiply(s)).astype(np.float32).A Mus = csr_matrix.dot(connectivities, s.multiply(u)).astype(np.float32).A return Mss, Mus def second_order_moments_u(adata): """Computes second order moments for stochastic velocity estimation. Arguments --------- adata: `AnnData` Annotated data matrix. Returns ------- Muu: Second order moments for unspliced abundances """ if 'neighbors' not in adata.uns: raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.') connectivities = get_connectivities(adata) u = csr_matrix(adata.layers['unspliced']) Muu = csr_matrix.dot(connectivities, u.multiply(u)).astype(np.float32).A return MuuPKWMC=<=<scvelo/preprocessing/utils.pyfrom ..logging import logg import numpy as np from scipy.sparse import issparse from scanpy.api.pp import log1p def show_proportions(adata): """Fraction of spliced/unspliced/ambiguous abundances Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. Returns ------- Prints the fractions of abundances. """ layers_keys = [key for key in ['spliced', 'unspliced', 'ambiguous'] if key in adata.layers.keys()] tot_mol_cell_layers = [adata.layers[key].sum(1) for key in layers_keys] mean_abundances = np.round( [np.mean(tot_mol_cell / np.sum(tot_mol_cell_layers, 0)) for tot_mol_cell in tot_mol_cell_layers], 2) print('Abundance of ' + str(layers_keys) + ': ' + str(mean_abundances)) def cleanup(data, clean='layers', keep={'spliced', 'unspliced'}, copy=False): """Deletes attributes not needed. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. clean: `str` or list of `str` (default: `layers`) Which attributes to consider for freeing memory. keep: `str` or list of `str` (default: `['spliced', unspliced']`) Which attributes to keep. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with selection of attributes kept. """ adata = data.copy() if copy else data if any(['obs' in clean, 'all' in clean]): for key in list(adata.obs.keys()): if key not in keep: del adata.obs[key] if any(['var' in clean, 'all' in clean]): for key in list(adata.var.keys()): if key not in keep: del adata.var[key] if any(['uns' in clean, 'all' in clean]): for key in list(adata.uns.keys()): if key not in keep: del adata.uns[key] if any(['layers' in clean, 'all' in clean]): for key in list(adata.layers.keys()): # remove layers that are not needed if key not in keep: del adata.layers[key] return adata if copy else None def set_initial_size(adata, layers={'spliced', 'unspliced'}): if all([layer in adata.layers.keys() for layer in layers]): layers = [layer for layer in layers if 'initial_size_' + layer not in adata.obs.keys()] total_size = 0 for layer in layers: X = adata.layers[layer] size = X.sum(1).A1.copy() if issparse(X) else X.sum(1).copy() adata.obs['initial_size_' + layer] = size total_size += size if 'initial_size' not in adata.obs.keys(): adata.obs['initial_size'] = total_size def get_initial_size(adata, layer, by_total_size): if layer not in {'spliced', 'unspliced'}: return None else: return adata.obs['initial_size'].copy() if by_total_size and 'initial_size' in adata.obs.keys() else \ adata.obs['initial_size_' + layer].copy() if 'initial_size_' + layer in adata.obs.keys() else None def filter_genes(data, min_counts=3, min_cells=None, max_counts=None, max_cells=None, min_counts_u=3, min_cells_u=None, max_counts_u=None, max_cells_u=None, copy=False): """Filter genes based on number of cells or counts. Keep genes that have at least `min_counts` counts or are expressed in at least `min_cells` cells or have at most `max_counts` counts or are expressed in at most `max_cells` cells. Only provide one of the optional parameters `min_counts`, `min_cells`, `max_counts`, `max_cells` per call. Parameters ---------- data : :class:`~anndata.AnnData`, `np.ndarray`, `sp.spmatrix` The (annotated) data matrix of shape `n_obs` × `n_vars`. Rows correspond to cells and columns to genes. min_counts : `int`, optional (default: `None`) Minimum number of counts required for a gene to pass filtering. min_cells : `int`, optional (default: `None`) Minimum number of cells expressed required for a gene to pass filtering. max_counts : `int`, optional (default: `None`) Maximum number of counts required for a gene to pass filtering. max_cells : `int`, optional (default: `None`) Maximum number of cells expressed required for a gene to pass filtering. min_counts_u : `int`, optional (default: `None`) Minimum number of unspliced counts required for a gene to pass filtering. min_cells_u : `int`, optional (default: `None`) Minimum number of unspliced cells expressed required for a gene to pass filtering. max_counts_u : `int`, optional (default: `None`) Maximum number of unspliced counts required for a gene to pass filtering. max_cells_u : `int`, optional (default: `None`) Maximum number of unspliced cells expressed required for a gene to pass filtering. copy : `bool`, optional (default: `False`) Determines whether a copy is returned. Returns ------- Filters the object and adds `n_counts` to `adata.var`. """ adata = data.copy() if copy else data from scanpy.api.pp import filter_genes # set initial cell sizes before filtering set_initial_size(adata) if min_counts is not None: filter_genes(adata, min_counts=min_counts) if max_counts is not None: filter_genes(adata, max_counts=max_counts) if min_cells is not None: filter_genes(adata, min_cells=min_cells) if max_cells is not None: filter_genes(adata, max_cells=max_cells) def bool_filter(adata, min_counts_u=None, min_cells_u=None, max_counts_u=None, max_cells_u=None, layer='unspliced'): counts = adata.layers[layer] if (min_counts_u is not None and max_counts_u is None) else adata.layers[layer] > 0 counts = counts.sum(0).A1 if issparse(counts) else counts.sum(0) lb = min_counts_u if min_counts_u is not None else min_cells_u if min_cells_u is not None else -np.inf ub = max_counts_u if max_counts_u is not None else max_cells_u if max_cells_u is not None else np.inf return lb <= counts, counts <= ub if 'unspliced' in adata.layers.keys(): gene_subset = np.ones(adata.n_vars, dtype=bool) if min_counts_u is not None or max_counts is not None: subset_min_counts, subset_max_counts = bool_filter(adata, min_counts_u=min_counts_u, max_counts_u=max_counts_u) gene_subset = subset_min_counts & subset_max_counts adata._inplace_subset_var(gene_subset) if min_cells_u is not None or max_cells_u is not None: subset_min_cells, subset_max_cells = bool_filter(adata, min_cells_u=min_cells_u, max_cells_u=max_cells_u) gene_subset = subset_min_cells & subset_max_cells adata._inplace_subset_var(gene_subset) s = np.sum(~gene_subset) if s > 0: logg.info('filtered out {} genes that are detected'.format(s), end=' ') if min_cells_u is not None or min_counts_u is not None: logg.info('in less than', str(min_cells_u) + ' unspliced cells' if min_counts_u is None else str(min_counts_u) + ' unspliced counts', no_indent=True) if max_cells_u is not None or max_counts_u is not None: logg.info('in more than ', str(max_cells_u) + ' unspliced cells' if max_counts_u is None else str(max_counts_u) + ' unspliced counts', no_indent=True) return adata if copy else None def filter_genes_dispersion(data, flavor='seurat', min_disp=None, max_disp=None, min_mean=None, max_mean=None, n_bins=20, n_top_genes=None, log=True, copy=False): """Extract highly variable genes. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected. Parameters ---------- data : :class:`~anndata.AnnData`, `np.ndarray`, `sp.sparse` The (annotated) data matrix of shape `n_obs` × `n_vars`. Rows correspond to cells and columns to genes. flavor : {'seurat', 'cell_ranger', 'svr'}, optional (default: 'seurat') Choose the flavor for computing normalized dispersion. If choosing 'seurat', this expects non-logarithmized data - the logarithm of mean and dispersion is taken internally when `log` is at its default value `True`. For 'cell_ranger', this is usually called for logarithmized data - in this case you should set `log` to `False`. In their default workflows, Seurat passes the cutoffs whereas Cell Ranger passes `n_top_genes`. min_mean=0.0125, max_mean=3, min_disp=0.5, max_disp=`None` : `float`, optional If `n_top_genes` unequals `None`, these cutoffs for the means and the normalized dispersions are ignored. n_bins : `int` (default: 20) Number of bins for binning the mean gene expression. Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1. You'll be informed about this if you set `settings.verbosity = 4`. n_top_genes : `int` or `None` (default: `None`) Number of highly-variable genes to keep. log : `bool`, optional (default: `True`) Use the logarithm of the mean to variance ratio. copy : `bool`, optional (default: `False`) If an :class:`~anndata.AnnData` is passed, determines whether a copy is returned. Returns ------- If an AnnData `adata` is passed, returns or updates `adata` depending on \ `copy`. It filters the `adata` and adds the annotations """ adata = data.copy() if copy else data set_initial_size(adata) if flavor == 'svr': mu = adata.X.mean(0).A1 if issparse(adata.X) else adata.X.mean(0) sigma = np.sqrt(adata.X.multiply(adata.X).mean(0).A1 - mu ** 2) if issparse(adata.X) else adata.X.std(0) log_mu = np.log2(mu) log_cv = np.log2(sigma / mu) from sklearn.svm import SVR clf = SVR(gamma=150. / len(mu)) clf.fit(log_mu[:, None], log_cv) score = log_cv - clf.predict(log_mu[:, None]) nth_score = np.sort(score)[::-1][n_top_genes] adata._inplace_subset_var(score >= nth_score) else: from scanpy.api.pp import filter_genes_dispersion filter_genes_dispersion(adata, flavor=flavor, min_disp=min_disp, max_disp=max_disp, min_mean=min_mean, max_mean=max_mean, n_bins=n_bins, n_top_genes=n_top_genes, log=log) return adata if copy else None def normalize_layers(data, layers={'spliced', 'unspliced'}, by_total_size=None, copy=False): """Normalize by total counts to median. """ adata = data.copy() if copy else data from scanpy.api.pp import normalize_per_cell def not_normalized_yet(adata, layer): X = adata.layers[layer] return np.allclose((X.data[:10] if issparse(X) else X[0]) % 1, 0, atol=1e-3) for layer in layers: if not_normalized_yet(adata, layer): size = get_initial_size(adata, layer, by_total_size) adata.layers[layer] = normalize_per_cell(adata.layers[layer], None, size, copy=True) return adata if copy else None def normalize_per_cell(data, counts_per_cell_after=None, counts_per_cell=None, key_n_counts=None, layers={'spliced', 'unspliced'}, copy=False): """Normalize each cell by total counts over all genes. Parameters ---------- data : :class:`~anndata.AnnData`, `np.ndarray`, `sp.sparse` The (annotated) data matrix of shape `n_obs` × `n_vars`. Rows correspond to cells and columns to genes. counts_per_cell_after : `float` or `None`, optional (default: `None`) If `None`, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization. counts_per_cell : `np.array`, optional (default: `None`) Precomputed counts per cell. key_n_counts : `str`, optional (default: `'n_counts'`) Name of the field in `adata.obs` where the total counts per cell are stored. layers: `str` or `list` (default: `{'spliced', 'unspliced'}`) Keys for layers to be also considered for normalization. copy : `bool`, optional (default: `False`) If an :class:`~anndata.AnnData` is passed, determines whether a copy is returned. Returns ------- Returns or updates `adata` with normalized version of the original `adata.X`, depending on `copy`. """ adata = data.copy() if copy else data from scanpy.api.pp import normalize_per_cell normalize_per_cell(adata, counts_per_cell_after, counts_per_cell, key_n_counts) normalize_layers(adata, layers) return adata if copy else None def filter_and_normalize(data, min_counts=3, min_counts_u=3, min_cells=None, min_cells_u=None, n_top_genes=None, flavor='seurat', log=True, copy=False): """Filtering, normalization and log transform Expects non-logarithmized data. If using logarithmized data, pass `log=False`. Runs the following steps .. code:: python scv.pp.filter_genes(adata) scv.pp.normalize_per_cell(adata) if n_top_genes is not None: scv.pp.filter_genes_dispersion(adata) if log: scv.pp.log1p(adata) Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. min_counts: `int` (default: 3) Minimum number of counts required for a gene to pass filtering (spliced). min_counts_u: `int` (default: 3) Minimum number of counts required for a gene to pass filtering (unspliced). min_cells: `int` (default: `None`) Minimum number of cells expressed required for a gene to pass filtering (spliced). min_cells_u: `int` (default: `None`) Minimum number of cells expressed required for a gene to pass filtering (unspliced). n_top_genes: `int` (default: `None`) Number of genes to keep. flavor: {'seurat', 'cell_ranger', 'svr'}, optional (default: 'seurat') Choose the flavor for computing normalized dispersion. If choosing 'seurat', this expects non-logarithmized data. log: `bool` (default: `True`) Take logarithm. copy: `bool` (default: `False`) Return a copy of `adata` instead of updating it. Returns ------- Returns or updates `adata` depending on `copy`. """ adata = data.copy() if copy else data filter_genes(adata, min_counts=min_counts, min_counts_u=min_counts_u, min_cells=min_cells, min_cells_u=min_cells_u) normalize_per_cell(adata) if n_top_genes is not None: filter_genes_dispersion(adata, n_top_genes=n_top_genes, flavor=flavor) if log: log1p(adata) return adata if copy else None def recipe_velocity(adata, min_counts=3, min_counts_u=3, n_top_genes=None, n_pcs=30, n_neighbors=30, log=True, copy=False): """Runs pp.filter_and_normalize() and pp.moments() """ from .moments import moments filter_and_normalize(adata, min_counts=min_counts, min_counts_u=min_counts_u, n_top_genes=n_top_genes, log=log) moments(adata, n_neighbors=n_neighbors, n_pcs=n_pcs) return adata if copy else None PK)VMAOscvelo/tools/__init__.pyfrom .velocity import velocity from .velocity_graph import velocity_graph from .transition_matrix import transition_matrix from .velocity_embedding import velocity_embedding from .velocity_confidence import velocity_confidence, velocity_confidence_transition from .terminal_states import eigs, terminal_states from .rank_velocity_genes import rank_velocity_genes from scanpy.api.tl import tsne, umap, diffmap, louvain, paga PKUMkJ#scvelo/tools/rank_velocity_genes.pyfrom ..logging import logg, settings from scipy.sparse import issparse from scanpy.preprocessing.simple import filter_genes_dispersion import numpy as np def get_mean_var(X, ignore_zeros=False, perc=None): data = X.data if issparse(X) else X mask_nans = np.isnan(data) | np.isinf(data) | np.isneginf(data) n_nonzeros = (X != 0).sum(0) n_counts = n_nonzeros if ignore_zeros else X.shape[0] if mask_nans.sum() > 0: if issparse(X): data[np.isnan(data) | np.isinf(data) | np.isneginf(data)] = 0 n_nans = n_nonzeros - (X != 0).sum(0) else: X[mask_nans] = 0 n_nans = mask_nans.sum(0) n_counts -= n_nans if perc is not None: if isinstance(perc, int): perc = [perc, 100] if perc < 50 else [0, perc] lb, ub = np.percentile(data, perc) data = np.clip(data, lb, ub) mean = (X.sum(0) / n_counts).A1 if issparse(X) else X.sum(0) / n_counts mean_sq = (X.multiply(X).sum(0) / n_counts).A1 if issparse(X) else np.multiply(X, X).sum(0) / n_counts var = (mean_sq - mean ** 2) * (X.shape[0] / (X.shape[0] - 1)) mean[np.isnan(mean)] = 0 var[np.isnan(var)] = 0 return mean, var def select_groups(adata, groups='all', key='louvain'): """Get subset of groups in adata.obs[key]. """ categories = adata.obs[key].cat.categories groups_masks = np.array([categories[i] == adata.obs[key].values for i, name in enumerate(categories)]) if groups == 'all': groups = categories.values else: groups_ids = [np.where(categories.values == name)[0][0] for name in groups] groups_masks = groups_masks[groups_ids] groups = categories[groups_ids].values return groups, groups_masks def rank_velocity_genes(data, groupby=None, groups='all', n_genes=10, min_counts=None, min_r2=None, min_dispersion=None, method='t-test_overestim_var', use_raw=True, copy=False): """Rank genes for characterizing groups according to unspliced/spliced correlation and differential expression. Parameters ---------- data : :class:`~anndata.AnnData` Annotated data matrix. groupby : `str` The key of the observations grouping to consider. use_raw : `bool`, optional (default: `True`) Use `raw` attribute of `adata` if present. groups : `str`, `list`, optional (default: `'all'`) Subset of groups, e.g. `['g1', 'g2', 'g3']`, to which comparison shall be restricted. If not passed, a ranking will be generated for all groups. n_genes : `int`, optional (default: 100) The number of genes that appear in the returned tables. method : {'t-test', 't-test_overestim_var'} (default: 't-test_overestim_var') If 't-test' uses t-test, if 't-test_overestim_var' overestimates variance of each group. Returns ------- Updates `adata` with the following fields. names : structured `np.ndarray` (`.uns['rank_genes_groups']`) Structured array to be indexed by group id storing the gene names. Ordered according to scores. scores : structured `np.ndarray` (`.uns['rank_genes_groups']`) Structured array to be indexed by group id storing the score for each gene for each group. Ordered according to scores. """ adata = data.copy() if copy else data logg.info('ranking velocity genes', r=True) if method not in {'t-test', 't-test_overestim_var'}: raise ValueError('Method not available.') if True: n_counts = (adata.layers['unspliced'] > 0).sum(0) n_counts = n_counts.A1 if issparse(adata.layers['unspliced']) else n_counts min_counts = np.percentile(n_counts, 80) if min_counts is None else min_counts filter_counts = n_counts > min_counts r2 = adata.var.velocity_r2 min_r2 = np.percentile(r2[r2 > 0], 80) if min_r2 is None else min_r2 filter_r2 = r2 > min_r2 dispersions = adata.var.dispersions_norm if 'dispersions_norm' in adata.var.keys() \ else filter_genes_dispersion(adata.X)['dispersions_norm'] min_dispersion = np.percentile(dispersions, 20) if min_dispersion is None else min_dispersion filter_dispersions = dispersions > min_dispersion idx_sub = filter_counts & filter_r2 & filter_dispersions if 'velocity_genes' in adata.var.keys(): idx_sub = idx_sub & adata.var['velocity_genes'] else: tmp_filter = adata.layers['velocity'] > np.percentile(adata.layers['velocity'], 95, axis=0) std = np.empty(adata.n_vars, dtype=np.float32) for i in range(adata.n_vars): std[i] = adata.obsm['X_' + basis][tmp_filter[:, i]].std(0).mean() idx_random = np.random.choice(adata.n_obs, size=int(adata.n_obs / 10), replace=False) threshold = adata.obsm['X_' + basis][idx_random].std(0).mean() - std.std() # p=0.1 if 'velocity_genes' in adata.var.keys(): std[~adata.var['velocity_genes']] = np.inf idx_sub = std < threshold n_counts = -std adata_sub = adata[:, idx_sub] if groupby is None: return np.array(adata_sub.var_names[np.argsort(n_counts[idx_sub])[::-1][:n_genes]]) else: X = adata_sub.raw.X if adata_sub.raw is not None and use_raw else adata_sub.X if n_genes > X.shape[1]: n_genes = X.shape[1] if isinstance(groups, list) and isinstance(groups[0], int): groups = [str(n) for n in groups] groups, groups_masks = select_groups(adata_sub, groups, groupby) n_groups = groups_masks.shape[0] sizes = groups_masks.sum(1) mean, var = np.zeros((n_groups, X.shape[1])), np.zeros((n_groups, X.shape[1])) for i, mask in enumerate(groups_masks): mean[i], var[i] = get_mean_var(X[mask]) # test each against the union of all other groups rankings_gene_names, rankings_gene_scores, rankings_gene_r2 = [], [], [] for i in range(n_groups): mask_rest = ~groups_masks[i] mean_rest, var_rest = get_mean_var(X[mask_rest]) size_rest = mask_rest.sum() if method == 't-test' else sizes[i] scores = (mean[i] - mean_rest) / np.sqrt(var[i] / sizes[i] + var_rest / size_rest) scores[np.isnan(scores)] = 0 # equivalent to but much faster than np.argsort(scores)[-10:] idx = np.argpartition(scores, -n_genes)[-n_genes:] idx = idx[np.argsort(scores[idx])[::-1]] rankings_gene_names.append(adata_sub.var_names[idx].values) rankings_gene_scores.append(scores[idx]) rankings_gene_r2.append(adata_sub.var['velocity_r2'][idx]) rankings_gene_names = np.array([list(n) for n in rankings_gene_names]) rankings_gene_scores = np.array([list(n) for n in rankings_gene_scores]) rankings_gene_gammas = np.array([list(n) for n in rankings_gene_r2]) #rankings_gene_names = np.rec.fromarrays([n for n in rankings_gene_names], dtype=[(rn, 'U50') for rn in groups]) #rankings_gene_scores = np.rec.fromarrays([n for n in rankings_gene_scores], dtype=[(rn, 'float32') for rn in groups]) #rankings_gene_gammas = np.rec.fromarrays([n for n in rankings_gene_gammas], dtype=[(rn, 'float32') for rn in groups]) key = 'rank_velocity_genes' if key not in adata.uns.keys(): adata.uns[key] = {} adata.uns[key] = {'groups': groups, 'names': rankings_gene_names, 'scores': rankings_gene_scores.round(0), 'r2': rankings_gene_gammas.round(2)} logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.uns`\n' ' \'' + key + '\', sorted np.recarray to be indexed by group ids\n') return adata if copy else None PKɄVMMOpscvelo/tools/run.pyfrom ..preprocessing import filter_and_normalize, moments from . import velocity, velocity_graph, velocity_embedding def run_all(data, basis=None, mode='deterministic', min_counts=3, n_pcs=30, n_neighbors=30, copy=False): from time import time start = time() adata = data.copy() if copy else data filter_and_normalize(adata, min_counts=min_counts) print("Number of genes to be used:", adata.X.shape[1]) moments(adata, n_neighbors=n_neighbors, n_pcs=n_pcs) velocity(adata, mode=mode) print("Number of genes to be used:", adata.var.velocity_genes.sum()) velocity_graph(adata) if basis is not None: velocity_embedding(adata, basis=basis) print('Total time (seconds): ' + str(round(time() - start, 2))) return adata if copy else None def convert_to_adata(vlm, basis=None): from collections import OrderedDict from .. import AnnData X = vlm.S_norm.T if hasattr(vlm, 'S_norm') else vlm.S_sz.T if hasattr(vlm, 'S_sz') else vlm.S.T # .sparse().T.tocsr() layers = OrderedDict() layers['spliced'] = vlm.S_sz.T if hasattr(vlm, 'S_sz') else vlm.S.T layers['unspliced'] = vlm.U_sz.T if hasattr(vlm, 'U_sz') else vlm.U.T if hasattr(vlm, 'velocity'): layers['velocity'] = vlm.velocity.T if hasattr(vlm, 'Sx_sz'): layers['Ms'] = vlm.Sx_sz.T if hasattr(vlm, 'Ux_sz'): layers['Mu'] = vlm.Ux_sz.T obs = dict(vlm.ca) if 'CellID' in obs.keys(): obs['obs_names'] = obs.pop('CellID') var = dict(vlm.ra) if 'Gene' in var.keys(): var['var_names'] = var.pop('Gene') if hasattr(vlm, 'q'): var['velocity_offset'] = vlm.q if hasattr(vlm, 'gammas'): var['velocity_gamma'] = vlm.gammas if hasattr(vlm, 'R2'): var['velocity_r2'] = vlm.R2 adata = AnnData(X, obs=obs, var=var, layers=layers) if basis is not None and hasattr(vlm, 'ts'): adata.obsm['X_' + basis] = vlm.ts return adata def convert_to_loom(adata, basis=None): from scipy.sparse import issparse import numpy as np import velocyto class VelocytoLoom(velocyto.VelocytoLoom): def __init__(self, adata, basis=None): self.S = adata.layers['spliced'].T self.U = adata.layers['unspliced'].T self.S = self.S.A if issparse(self.S) else self.S self.U = self.U.A if issparse(self.U) else self.U if 'ambiguous' in adata.layers.keys(): self.A = adata.layers['ambiguous'].T if issparse(self.A): self.A = self.A.A self.initial_cell_size = self.S.sum(0) self.initial_Ucell_size = self.U.sum(0) self.ca = dict() self.ra = dict() self.ca['CellID'] = np.array(adata.obs_names) self.ra['Gene'] = np.array(adata.var_names) for key in adata.obs.keys(): self.ca[key] = np.array(adata.obs[key].values) for key in adata.var.keys(): self.ra[key] = np.array(adata.var[key].values) if isinstance(basis, str) and 'X_' + basis in adata.obsm.keys(): self.ts = adata.obsm['X_' + basis] if 'clusters' in self.ca: self.set_clusters(self.ca['clusters']) elif 'louvain' in self.ca: self.set_clusters(self.ca['louvain']) def filter_and_normalize(self, min_counts=3, min_counts_u=3, min_cells=0, min_cells_u=0, n_top_genes=None, max_expr_avg=20): # counterpart to scv.pp.filter_and_normalize() self.score_detection_levels(min_expr_counts=min_counts, min_expr_counts_U=0, min_cells_express=min_cells, min_cells_express_U=0) self.filter_genes(by_detection_levels=True) if n_top_genes is not None and n_top_genes < self.S.shape[0]: self.score_cv_vs_mean(n_top_genes, max_expr_avg=max_expr_avg) self.filter_genes(by_cv_vs_mean=True) self.score_detection_levels(min_expr_counts=0, min_cells_express=0, min_expr_counts_U=min_counts_u, min_cells_express_U=min_cells_u) self.filter_genes(by_detection_levels=True) self._normalize_S(relative_size=self.initial_cell_size, target_size=np.median(self.initial_cell_size)) self._normalize_U(relative_size=self.initial_Ucell_size, target_size=np.median(self.initial_Ucell_size)) #self.normalize_by_total() print("Number of genes to be used:", self.S.shape[0]) def impute(self, n_pcs=30, n_neighbors=30, balanced=False, renormalize=False): # counterpart to scv.pp.moments() self.perform_PCA(n_components=n_pcs) k = n_neighbors self.knn_imputation(n_pca_dims=n_pcs, k=k, balanced=balanced, b_sight=k * 8, b_maxl=k * 4) if renormalize: self.normalize_median() def velocity_estimation(self, limit_gamma=False, fit_offset=False): self.fit_gammas(limit_gamma=limit_gamma, fit_offset=fit_offset) self.filter_genes_good_fit() self.predict_U() self.calculate_velocity() self.calculate_shift() self.extrapolate_cell_at_t() print("Number of genes to be used:", self.S.shape[0]) def velocity_graph(self, n_neighbors=100, transform='linear', sampled_fraction=.5, expression_scaling=False, sigma_corr=.05): if not hasattr(self, 'ts'): raise ValueError('Compute embedding first.') else: # counterpart to scv.tl.velocity_graph() self.estimate_transition_prob(hidim="Sx_sz", embed="ts", transform=transform, n_neighbors=n_neighbors, knn_random=True, sampled_fraction=sampled_fraction) # counterpart to scv.tl.velocity_embedding() self.calculate_embedding_shift(sigma_corr=sigma_corr, expression_scaling=expression_scaling) def run_all(self, min_counts=3, min_counts_u=3, n_pcs=30, n_neighbors=30, n_neighbors_graph=100, n_top_genes=None, fit_offset=False, limit_gamma=False, transform='linear', expression_scaling=False,): from time import time start = time() self.filter_and_normalize(min_counts=min_counts, min_counts_u=min_counts_u, n_top_genes=n_top_genes) print('Preprocessing: ' + str(round(time() - start, 2))) timestamp = time() self.impute(n_pcs=n_pcs, n_neighbors=n_neighbors) print('Imputation: ' + str(round(time() - timestamp, 2))) timestamp = time() self.velocity(limit_gamma=limit_gamma, fit_offset=fit_offset) print('Velocity Estimation: ' + str(round(time() - timestamp, 2))) timestamp = time() self.velocity_graph(n_neighbors=n_neighbors_graph, transform=transform, expression_scaling=expression_scaling) print('Velocity Graph: ' + str(round(time() - timestamp, 2))) print('Total: ' + str(round(time() - start, 2))) return VelocytoLoom(adata, basis) PKNMffscvelo/tools/solver.pyfrom .utils import prod_sum_obs from scipy.optimize import minimize import numpy as np import warnings def solve_cov(x, y, fit_offset=False): """Solution to least squares: gamma = cov(X,Y) / var(X) """ n_obs, n_var = x.shape with warnings.catch_warnings(): warnings.simplefilter("ignore") if fit_offset: cov_xy, cov_xx = prod_sum_obs(x, y) / n_obs, prod_sum_obs(x, x) / n_obs mean_x, mean_y = x.mean(0), y.mean(0) numerator_offset = cov_xx * mean_y - cov_xy * mean_x numerator_gamma = cov_xy - mean_x * mean_y offset, gamma = (numerator_offset, numerator_gamma) / (cov_xx - mean_x * mean_x) else: offset, gamma = np.zeros(n_var), prod_sum_obs(x, y) / prod_sum_obs(x, x) offset[np.isnan(offset)] = 0 gamma[np.isnan(gamma)] = 0 return offset, gamma def solve2_inv(x, y, x2, y2, res_std=None, res2_std=None, fit_offset=False, fit_offset2=False): """Solution to the 2-dim generalized least squares: gamma = inv(X'QX)X'QY """ n_obs, n_var = x.shape offset, offset_ss = np.zeros(n_var, dtype="float32"), np.zeros(n_var, dtype="float32") gamma = np.ones(n_var, dtype="float32") if (res_std is None) or (res2_std is None): res_std, res2_std = np.ones(n_var), np.ones(n_var) ones, zeros = np.ones(n_obs), np.zeros(n_obs) with warnings.catch_warnings(): warnings.simplefilter("ignore") x, y = np.vstack((x/res_std, x2/res2_std)), np.vstack((y/res_std, y2/res2_std)) if fit_offset and fit_offset2: for i in range(n_var): A = np.c_[np.vstack((np.c_[ones/res_std[i], zeros], np.c_[zeros, ones/res2_std[i]])), x[:, i]] offset[i], offset_ss[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) elif fit_offset: for i in range(n_var): A = np.c_[np.hstack((ones/res_std[i], zeros)), x[:, i]] offset[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) elif fit_offset2: for i in range(n_var): A = np.c_[np.hstack((zeros, ones/res2_std[i])), x[:, i]] offset_ss[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) else: for i in range(n_var): A = np.c_[x[:, i]] gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) offset[np.isnan(offset)] = 0 offset_ss[np.isnan(offset_ss)] = 0 gamma[np.isnan(gamma)] = 0 return offset, offset_ss, gamma def solve2_mle(Ms, Mu, Mus, Mss, fit_offset=False, fit_offset2=False): """Maximizing the log likelihood using weights according to empirical bayes """ n_obs, n_var = Ms.shape offset, offset_ss = np.zeros(n_var, dtype="float32"), np.zeros(n_var, dtype="float32") gamma = np.ones(n_var, dtype="float32") def sse(A, data, b): sigma = (A.dot(data) - b).std(1) return np.log(sigma).sum() # np.log(np.sqrt(2*np.pi) * sigma).sum() + (.5 * (res/sigma[:, None])**2).sum() if fit_offset and fit_offset2: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset[i], offset_ss[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[2], 0, 0], [1, m[2], 2, -2 * m[2]]]), data, b=np.array(m[0], m[1])), x0=(1e-4, 1e-4, 1), method="L-BFGS-B").x elif fit_offset: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[1], 0, 0], [1, m[1], 2, -2 * m[1]]]), data, b=np.array(m[0], 0)), x0=(1e-4, 1), method="L-BFGS-B").x elif fit_offset2: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset_ss[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[1], 0, 0], [1, m[1], 2, -2 * m[1]]]), data, b=np.array(0, m[0])), x0=(1e-4, 1), method="L-BFGS-B").x else: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m, 0, 0], [1, m, 2, -2 * m]]), data, b=0), x0=gamma[i], method="L-BFGS-B").x return offset, offset_ss, gamma PKckQMU >p p scvelo/tools/terminal_states.pyfrom ..logging import logg, settings from ..preprocessing.moments import get_connectivities from .transition_matrix import transition_matrix from .utils import scale from scipy.sparse import linalg, csr_matrix import numpy as np def eigs(T, k=10, eps=1e-3, perc=None): eigvals, eigvecs = linalg.eigs(T.T, k=k, which='LR') # find k eigs with largest real part p = np.argsort(eigvals)[::-1] # sort in descending order of eigenvalues eigvals = eigvals.real[p] eigvecs = eigvecs.real[:, p] idx = (eigvals >= 1 - eps) # select eigenvectors with eigenvalue of 1 eigvals = eigvals[idx] eigvecs = np.absolute(eigvecs[:, idx]) if perc is not None: lbs, ubs = np.percentile(eigvecs, perc, axis=0) eigvecs[eigvecs < lbs] = 0 eigvecs = np.clip(eigvecs, 0, ubs) eigvecs /= eigvecs.max(0) return eigvals, eigvecs def terminal_states(data, vkey='velocity', self_transitions=False, basis=None, weight_diffusion=0, scale_diffusion=1, eps=1e-3, copy=False): """Computes terminal states (root and end points) via eigenvalue decomposition. """ adata = data.copy() if copy else data connectivities = get_connectivities(adata, 'distances') logg.info('computing root cells', r=True, end=' ') T = transition_matrix(adata, vkey=vkey, basis=basis, weight_diffusion=weight_diffusion, scale_diffusion=scale_diffusion, self_transitions=self_transitions, backward=True) eigvecs = eigs(T, eps=eps, perc=[2, 98])[1] eigvec = csr_matrix.dot(connectivities, eigvecs).sum(1) eigvec = np.clip(eigvec, 0, np.percentile(eigvec, 98)) adata.obs['root'] = scale(eigvec) logg.info('using ' + str(eigvecs.shape[1]) + ' eigenvectors with eigenvalue 1.') logg.info('computing end points', end=' ') T = transition_matrix(adata, vkey=vkey, basis=basis, weight_diffusion=weight_diffusion, scale_diffusion=scale_diffusion, self_transitions=self_transitions, backward=False) eigvecs = eigs(T, eps=eps, perc=[2, 98])[1] eigvec = csr_matrix.dot(connectivities, eigvecs).sum(1) eigvec = np.clip(eigvec, 0, np.percentile(eigvec, 98)) adata.obs['end'] = scale(eigvec) logg.info('using ' + str(eigvecs.shape[1]) + ' eigenvectors with eigenvalue 1.') logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added\n' ' \'root\', root cells of Markov diffusion process (adata.obs)\n' ' \'end\', end points of Markov diffusion process (adata.obs)') return adata if copy else None PKUM:@ !scvelo/tools/transition_matrix.pyfrom scipy.spatial.distance import pdist, squareform from scipy.sparse import csr_matrix from .utils import normalize import numpy as np def transition_matrix(adata, vkey='velocity', basis=None, backward=False, self_transitions=True, scale=10, use_negative_cosines=False, weight_diffusion=0, scale_diffusion=1): """Computes transition probabilities by applying Gaussian kernel to cosine similarities x scale Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. basis: `str` or `None` (default: `None`) Restrict transition to embedding if specified backward: `bool` (default: `False`) Whether to use the transition matrix to push forward (`False`) or to pull backward (`True`) scale: `float` (default: 10) Scale parameter of gaussian kernel. weight_diffusion: `float` (default: 0) Relative weight to be given to diffusion kernel (Brownian motion) scale_diffusion: `float` (default: 1) Scale of diffusion kernel. Returns ------- Returns sparse matrix with transition probabilities. """ if vkey+'_graph' not in adata.uns: raise ValueError('You need to run `tl.velocity_graph` first to compute cosine correlations.') graph = csr_matrix(adata.uns[vkey + '_graph']).copy() if self_transitions: confidence = graph.max(1).A.flatten() ub = np.percentile(confidence, 98) self_prob = np.clip(ub - confidence, 0, 1) graph.setdiag(self_prob) T = np.expm1(graph * scale) # equivalent to np.exp(adata.uns[vkey + '_graph'].A * scale) - 1 if vkey + '_graph_neg' in adata.uns.keys(): T = T - np.expm1(-adata.uns[vkey + '_graph_neg'] * scale) if use_negative_cosines \ else T + (adata.uns[vkey + '_graph_neg'] < 0) * 1e-10 # weight direct neighbors with 1 and indirect (recurse) neighbors with 0.5 if 'neighbors' in adata.uns.keys(): direct_neighbors = adata.uns['neighbors']['distances'] > 0 direct_neighbors.setdiag(1) T = .5 * T + .5 * direct_neighbors.multiply(T) if backward: T = T.T T = normalize(T) if 'X_' + str(basis) in adata.obsm.keys(): dists_emb = (T > 0).multiply(squareform(pdist(adata.obsm['X_' + basis]))) scale_diffusion *= dists_emb.data.mean() diffusion_kernel = dists_emb.copy() diffusion_kernel.data = np.exp(-.5 * dists_emb.data ** 2 / scale_diffusion ** 2) T = T.multiply(diffusion_kernel) # combine velocity based kernel with diffusion based kernel if 0 < weight_diffusion < 1: # add another diffusion kernel (Brownian motion - like) diffusion_kernel.data = np.exp(-.5 * dists_emb.data ** 2 / (scale_diffusion/2) ** 2) T = (1-weight_diffusion) * T + weight_diffusion * diffusion_kernel T = normalize(T) return TPKIVWM scvelo/tools/utils.pyfrom scipy.sparse import csr_matrix, issparse import numpy as np import warnings def prod_sum_obs(A, B): """dot product and sum over axis 0 (obs) equivalent to np.sum(A * B, 0) """ return A.multiply(B).sum(0).A1 if issparse(A) else np.einsum('ij, ij -> j', A, B) def prod_sum_var(A, B): """dot product and sum over axis 1 (var) equivalent to np.sum(A * B, 1) """ return A.multiply(B).sum(1).A1 if issparse(A) else np.einsum('ij, ij -> i', A, B) def norm(A): """computes the L2-norm along axis 1 (e.g. genes or embedding dimensions) equivalent to np.linalg.norm(A, axis=1) """ return np.sqrt(A.multiply(A).sum(1).A1) if issparse(A) else np.sqrt(np.einsum('ij, ij -> i', A, A)) def vector_norm(x): """computes the L2-norm along axis 1 (e.g. genes or embedding dimensions) equivalent to np.linalg.norm(A, axis=1) """ return np.sqrt(np.einsum('i, i -> ', x, x)) def R_squared(residual, total): with warnings.catch_warnings(): warnings.simplefilter("ignore") r2 = np.ones(residual.shape[1]) - prod_sum_obs(residual, residual) / prod_sum_obs(total, total) r2[np.isnan(r2)] = 0 return r2 def cosine_correlation(dX, Vi): dX -= dX.mean(-1)[:, None] Vi_norm = vector_norm(Vi) with warnings.catch_warnings(): warnings.simplefilter("ignore") result = np.zeros(dX.shape[0]) if Vi_norm == 0 else np.einsum('ij, j', dX, Vi) / (norm(dX) * Vi_norm)[None, :] return result def normalize(X): with warnings.catch_warnings(): warnings.simplefilter("ignore") X = X.multiply(csr_matrix(1. / np.abs(X).sum(1))) if issparse(X) else X / X.sum(1) return X def scale(X, min=0, max=1): X = X - X.min() + min X = X / X.max() * max return X def get_indices(dist): n_neighbors = (dist > 0).sum(1).min() rows_idx = np.where((dist > 0).sum(1) > n_neighbors)[0] for row_idx in rows_idx: col_idx = dist[row_idx].indices[n_neighbors:] dist[row_idx, col_idx] = 0 dist.eliminate_zeros() indices = dist.indices.reshape((-1, n_neighbors)) return indices, dist def get_iterative_indices(indices, index, n_recurse_neighbors): def iterate_indices(indices, index, n_recurse_neighbors): return indices[iterate_indices(indices, index, n_recurse_neighbors - 1)] \ if n_recurse_neighbors > 1 else indices[index] return np.unique(iterate_indices(indices, index, n_recurse_neighbors)) def groups_to_bool(adata, groups, groupby=None): groups = [groups] if isinstance(groups, str) else groups if isinstance(groups, (list, tuple, np.ndarray, np.record)): groupby = groupby if groupby in adata.obs.keys() else 'clusters' if 'clusters' in adata.obs.keys() \ else 'louvain' if 'louvain' in adata.obs.keys() else None if groupby is not None: groups = np.array([key in groups for key in adata.obs[groupby]]) else: raise ValueError('groupby attribute not valid.') return groups PKRWM>##scvelo/tools/velocity.pyfrom ..logging import logg, settings from ..preprocessing.moments import moments, second_order_moments from .solver import solve_cov, solve2_inv, solve2_mle from .utils import R_squared, groups_to_bool from scipy.sparse import issparse, csr_matrix import numpy as np import warnings warnings.simplefilter(action='ignore', category=FutureWarning) class Velocity: def __init__(self, adata=None, Ms=None, Mu=None, subset=None, residual=None, use_raw=False,): self._adata = adata self._Ms = adata.layers['spliced'] if use_raw else adata.layers['Ms'] if Ms is None else Ms self._Mu = adata.layers['unspliced'] if use_raw else adata.layers['Mu'] if Mu is None else Mu self._Ms = self._Ms.A if issparse(self._Ms) else self._Ms self._Mu = self._Mu.A if issparse(self._Mu) else self._Mu n_obs, n_vars = self._Ms.shape self._residual, self._residual2 = residual, None self._offset, self._offset2 = np.zeros(n_vars, dtype=np.float32), np.zeros(n_vars, dtype=np.float32) self._gamma, self._r2 = np.zeros(n_vars, dtype=np.float32), np.zeros(n_vars, dtype=np.float32) self._beta, self._velocity_genes = np.ones(n_vars, dtype=np.float32), np.ones(n_vars, dtype=bool) self._subset = subset self._weight = None def compute_deterministic(self, fit_offset=False, perc=None): Ms = self._Ms if self._subset is None else self._Ms[self._subset] Mu = self._Mu if self._subset is None else self._Mu[self._subset] if perc is not None: M_norm = Ms / np.clip(Ms.max(0), 1e-3, None) + Mu / np.clip(Mu.max(0), 1e-3, None) W = csr_matrix(M_norm >= np.percentile(M_norm, perc, axis=0)) Ms, Mu = W.multiply(Ms).tocsr(), W.multiply(Mu).tocsr() self._offset, self._gamma = solve_cov(Ms, Mu, fit_offset) self._residual = self._Mu - self._gamma * self._Ms if fit_offset: self._residual -= self._offset self._r2 = R_squared(self._residual, total=self._Mu - self._Mu.mean(0)) self._velocity_genes = (self._r2 > .01) & (self._gamma > .01) def compute_deterministic_weighted(self, fit_offset=False, perc=98): if self._residual is None: self.compute_deterministic(fit_offset) Ms = self._Ms if self._subset is None else self._Ms[self._subset] Mu = self._Mu if self._subset is None else self._Mu[self._subset] W = csr_matrix(Ms >= np.percentile(Ms, perc, axis=0)) self._offset, self._gamma = solve_cov(W.multiply(Ms), W.multiply(Mu), fit_offset) self._residual = self._Mu - self._gamma * self._Ms if fit_offset: self._residual -= self._offset def compute_stochastic(self, fit_offset=False, fit_offset2=False, mode=None): if self._residual is None: self.compute_deterministic(fit_offset) idx = self._velocity_genes is_subset = True if len(set(idx)) > 1 else False _adata = self._adata[:, idx] if is_subset else self._adata _Ms = self._Ms[:, idx] if is_subset else self._Ms _Mu = self._Mu[:, idx] if is_subset else self._Mu _residual = self._residual[:, idx] if is_subset else self._residual _Mss, _Mus = second_order_moments(_adata) var_ss = 2 * _Mss - _Ms cov_us = 2 * _Mus + _Mu _offset2, _gamma2 = solve_cov(var_ss, cov_us, fit_offset2) # initialize covariance matrix res_std = _residual.std(0) res2_std = (cov_us - _gamma2 * var_ss - _offset2).std(0) # solve multiple regression self._offset[idx], self._offset2[idx], self._gamma[idx] = \ solve2_mle(_Ms, _Mu, _Mus, _Mss, fit_offset, fit_offset2) if mode == 'bayes' \ else solve2_inv(_Ms, _Mu, var_ss, cov_us, res_std, res2_std, fit_offset, fit_offset2) self._residual = self._Mu - self._gamma * self._Ms if fit_offset: self._residual -= self._offset _residual2 = (cov_us - 2 * _Ms * _Mu) - self._gamma[idx] * (var_ss - 2 * _Ms ** 2) if fit_offset: _residual2 += 2 * self._offset[idx] * _Ms if fit_offset2: _residual2 -= self._offset2[idx] if is_subset: self._residual2 = np.zeros(self._Ms.shape, dtype=np.float32) self._residual2[:, idx] = _residual2 else: self._residual2 = _residual2 # if mode == 'alpha': # Muu = second_order_moments_u(adata) # offset2u, alpha = solve_cov(np.ones(Mu.shape) + 2 * Mu, 2 * Muu - Mu) # pars.extend([offset2u, alpha]) # pars_str.extend(['_offset2u', '_alpha']) def get_residuals(self): return self._residual, self._residual2 def get_pars(self): return self._offset, self._offset2, self._beta, self._gamma, self._r2, self._velocity_genes def get_pars_names(self): return ['_offset', '_offset2', '_beta', '_gamma', '_r2', '_genes'] def velocity(data, vkey='velocity', mode=None, fit_offset=False, fit_offset2=False, filter_genes=False, groups=None, groupby=None, subset_for_fitting=None, use_raw=False, perc=None, copy=False): """Estimates velocities in a gene-specific manner Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name under which to refer to the computed velocities for `velocity_graph` and `velocity_embedding`. mode: `'deterministic'`, `'stochastic'` or `'bayes'` (default: `'stochastic'`) Whether to run the estimation using the deterministic or stochastic model of transcriptional dynamics. `'bayes'` solves the stochastic model and accounts for heteroscedasticity, but is slower than `'stochastic'`. fit_offset: `bool` (default: `False`) Whether to fit with offset for first order moment dynamics. fit_offset2: `bool`, (default: `False`) Whether to fit with offset for second order moment dynamics. filter_genes: `bool` (default: `True`) Whether to remove genes that are not used for further velocity analysis. groups: `str`, `list` (default: `None`) Subset of groups, e.g. [‘g1’, ‘g2’, ‘g3’], to perform velocity analysis on. groupby: `str` (default: `None`) Key of observations grouping to consider. subset_for_fitting: `str`, `list` (default: `None`) Subset of groups, e.g. [‘g1’, ‘g2’, ‘g3’], to which velocity estimation shall be restricted. perc: `int` (default: `None`) Percentile to which velocity estimation shall be restricted. copy: `bool` (default: `False`) Return a copy instead of writing to `adata`. Returns ------- Returns or updates `adata` with the attributes velocity: `.layers` velocity vectors for each individual cell variance_velocity: `.layers` velocity vectors for the cell variances velocity_offset, velocity_beta, velocity_gamma, velocity_r2: `.var` parameters """ adata = data.copy() if copy else data if not use_raw and 'Ms' not in adata.layers.keys(): moments(adata) groups = groups_to_bool(adata, groups, groupby) subset_for_fitting = groups_to_bool(adata, subset_for_fitting, groupby) _adata = adata if groups is None else adata[groups] logg.info('computing velocities', r=True) velo = Velocity(_adata, subset=subset_for_fitting, use_raw=use_raw) velo.compute_deterministic(fit_offset, perc=perc) stochastic = any([mode is not None and mode in item for item in ['stochastic', 'bayes', 'alpha']]) if stochastic: vkey2 = 'variance_' + vkey if filter_genes and len(set(velo._velocity_genes)) > 1: idx = velo._velocity_genes adata._inplace_subset_var(idx) velo = Velocity(_adata, residual=velo._residual[:, idx], subset=subset_for_fitting) velo.compute_stochastic(fit_offset, fit_offset2, mode) if groups is None: adata.layers[vkey], adata.layers[vkey2] = velo.get_residuals() else: if vkey not in adata.layers.keys(): adata.layers[vkey] = np.zeros(adata.shape, dtype=np.float32) if vkey2 not in adata.layers.keys(): adata.layers[vkey2] = np.zeros(adata.shape, dtype=np.float32) adata.layers[vkey][groups], adata.layers[vkey2][groups] = velo.get_residuals() else: if groups is None: adata.layers[vkey], _ = velo.get_residuals() else: if vkey not in adata.layers.keys(): adata.layers[vkey] = np.zeros(adata.shape, dtype=np.float32) adata.layers[vkey][groups], _ = velo.get_residuals() pars = velo.get_pars() for i, key in enumerate(velo.get_pars_names()): if len(set(pars[i])) > 1: adata.var[vkey + key] = pars[i] logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added \n' ' \'' + vkey + '\', velocity vectors for each individual cell (adata.layers)') if filter_genes and len(set(velo._velocity_genes)) > 1: # re-initialize after filtering adata._inplace_subset_var(velo._velocity_genes) return adata if copy else None PK3jQM #scvelo/tools/velocity_confidence.pyfrom ..logging import logg from .utils import prod_sum_var, norm, get_indices from ..preprocessing.moments import moments from .transition_matrix import transition_matrix from scanpy.api.pp import neighbors import numpy as np def velocity_confidence(data, vkey='velocity', copy=False): adata = data.copy() if copy else data if vkey not in adata.layers.keys(): raise ValueError( 'You need to run `tl.velocity` first.') idx = adata.var['velocity_genes'] X, V = adata.layers['Ms'][:, idx].copy(), adata.layers[vkey][:, idx].copy() indices = get_indices(dist=adata.uns['neighbors']['distances'])[0] V -= V.mean(1)[:, None] V_norm = norm(V) R = np.zeros(adata.n_obs) for i in range(adata.n_obs): Vi_neighs = V[indices[i]] Vi_neighs -= Vi_neighs.mean(1)[:, None] R[i] = np.mean(np.einsum('ij, j', Vi_neighs, V[i]) / (norm(Vi_neighs) * V_norm[i])[None, :]) adata.obs[vkey + '_length'] = V_norm.round(2) adata.obs[vkey + '_confidence'] = R logg.hint('added \'' + vkey + '_confidence\' (adata.obs)') return adata if copy else None def velocity_confidence_transition(data, vkey='velocity', scale=10, copy=False): adata = data.copy() if copy else data if vkey not in adata.layers.keys(): raise ValueError( 'You need to run `tl.velocity` first.') idx = adata.var['velocity_genes'] T = transition_matrix(adata, vkey=vkey, scale=scale) dX = T.dot(adata.layers['Ms'][:, idx]) - adata.layers['Ms'][:, idx] dX -= dX.mean(1)[:, None] V = adata.layers[vkey][:, idx].copy() V -= V.mean(1)[:, None] adata.obs[vkey + '_confidence_transition'] = prod_sum_var(dX, V) / (norm(dX) * norm(V)) logg.hint('added \'' + vkey + '_confidence_transition\' (adata.obs)') return adata if copy else None def random_subsample(adata, frac=.5): subset = np.random.choice([True, False], size=adata.n_obs, p=[frac, 1-frac]).sum() adata.obs['subset'] = subset adata_subset = adata[subset].copy() neighbors(adata_subset) moments(adata_subset) return adata_subset def score_robustness(data, adata_subset=None, vkey='velocity', copy=False): adata = data.copy() if copy else data if adata_subset is None: adata_subset = random_subsample(adata) V = adata[adata.obs['subset']].layers[vkey] V_subset = adata_subset.layers[vkey] adata_subset.obs[vkey + '_score_robustness'] = prod_sum_var(V, V_subset) / (norm(V) * norm(V_subset)) return adata_subset if copy else None PK9jQMp1'. . "scvelo/tools/velocity_embedding.pyfrom ..logging import logg, settings from .utils import norm from .transition_matrix import transition_matrix from scipy.sparse import issparse import numpy as np import warnings def quiver_autoscale(X, Y, U, V): import matplotlib.pyplot as pl Q = pl.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=None) Q._init() pl.clf() return Q.scale def velocity_embedding(data, basis=None, vkey='velocity', scale=10, self_transitions=True, use_negative_cosines=True, retain_scale=False, copy=False): """Computes the single cell velocities in the embedding Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'tsne'`) Which embedding to use. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. scale: `int` (default: 10) Scale parameter of gaussian kernel for transition matrix. retain_scale: `bool` (default: `False`) Whether to retain scale from high dimensional space in embedding. Returns ------- Returns or updates `adata` with the attributes velocity_umap: `.obsm` coordinates of velocity projection on embedding """ adata = data.copy() if copy else data T = transition_matrix(adata, vkey=vkey, scale=scale, self_transitions=self_transitions, use_negative_cosines=use_negative_cosines) T.setdiag(0) T.eliminate_zeros() logg.info('computing velocity embedding', r=True) if basis is None: keys = [key for key in ['pca', 'tsne', 'umap'] if 'X_' + key in adata.obsm.keys()] if len(keys) > 0: basis = keys[-1] else: raise ValueError('No basis specified') if 'X_' + basis not in adata.obsm_keys(): raise ValueError('You need compute the embedding first.') X_emb = adata.obsm['X_' + basis][:, :2] V_emb = np.zeros((adata.n_obs, 2)) TA = T.A if adata.n_obs < 8192 else None sparse = False if adata.n_obs < 8192 else True with warnings.catch_warnings(): warnings.simplefilter("ignore") for i in range(adata.n_obs): indices = T[i].indices dX = X_emb[indices] - X_emb[i, None] # shape (n_neighbors, 2) if not retain_scale: dX /= norm(dX)[:, None] dX[np.isnan(dX)] = 0 # zero diff in a steady-state probs = T[i].data if sparse else TA[i, indices] V_emb[i] = probs.dot(dX) - probs.mean() * dX.sum(0) # probs.sum() / len(indices) if retain_scale: delta = T.dot(adata.X) - adata.X if issparse(delta): delta = delta.A cos_proj = (adata.layers[vkey] * delta).sum(1) / norm(delta) V_emb *= np.clip(cos_proj[:, None] * 10, 0, 1) V_emb /= (3 * quiver_autoscale(X_emb[:, 0], X_emb[:, 1], V_emb[:, 0], V_emb[:, 1])) vkey += '_' + basis adata.obsm[vkey] = V_emb logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added\n' ' \'' + vkey + '\', embedded velocity vectors (adata.obsm)') return adata if copy else None PKVMs.scvelo/tools/velocity_graph.pyfrom ..logging import logg, settings, ProgressReporter from .utils import cosine_correlation, get_indices, get_iterative_indices from .velocity import velocity from .rank_velocity_genes import rank_velocity_genes from scipy.sparse import coo_matrix, issparse import numpy as np def vals_to_csr(vals, rows, cols, shape): graph = coo_matrix((vals, (rows, cols)), shape=shape) graph_neg = graph.copy() graph.data = np.clip(graph.data, 0, 1) graph_neg.data = np.clip(graph_neg.data, -1, 0) graph.eliminate_zeros() graph_neg.eliminate_zeros() return graph.tocsr(), graph_neg.tocsr() class VelocityGraph: def __init__(self, adata, vkey='velocity', xkey='Ms', basis=None, n_neighbors=None, n_recurse_neighbors=None, n_top_genes=None, sqrt_transform=False): subset = rank_velocity_genes(adata, n_genes=n_top_genes, min_r2=.1, min_dispersion=0, min_counts=10) \ if (n_top_genes is not None and n_top_genes < adata.var['velocity_genes'].sum()) \ else adata.var['velocity_genes'] if 'velocity_genes' in adata.var.keys() \ else np.ones(adata.n_vars, dtype=bool) X = adata[:, subset].layers[xkey].A if issparse(adata.layers[xkey]) else adata[:, subset].layers[xkey] V = adata[:, subset].layers[vkey].A if issparse(adata.layers[vkey]) else adata[:, subset].layers[vkey] self.X = np.array(X.copy(), dtype=np.float32) self.V = np.array(V.copy(), dtype=np.float32) self.sqrt_transform = sqrt_transform if sqrt_transform: self.V = np.sqrt(np.abs(self.V)) * np.sign(self.V) self.V -= self.V.mean(1)[:, None] self.n_recurse_neighbors = 1 if n_neighbors is not None \ else 2 if n_recurse_neighbors is None else n_recurse_neighbors if n_neighbors is None and 'neighbors' in adata.uns.keys(): self.indices = get_indices(dist=adata.uns['neighbors']['distances'])[0] else: from scanpy.api import Neighbors neighs = Neighbors(adata) if basis is None: basis = [key for key in ['X_pca', 'X_tsne', 'X_umap'] if key in adata.obsm.keys()][-1] if n_neighbors is None: n_neighbors = int(adata.shape[0] / 50) neighs.compute_neighbors(n_neighbors=n_neighbors, use_rep=basis, n_pcs=10) self.indices = get_indices(dist=neighs.distances)[0] self.graph = adata.uns[vkey + '_graph'] if vkey + '_graph' in adata.uns.keys() else [] self.graph_neg = adata.uns[vkey + '_graph_neg'] if vkey + '_graph_neg' in adata.uns.keys() else [] def compute_cosines(self): vals, rows, cols, n_obs = [], [], [], self.X.shape[0] progress = ProgressReporter(n_obs) for i in range(n_obs): neighs_idx = get_iterative_indices(self.indices, i, self.n_recurse_neighbors) if self.V[i].max() != 0 or self.V[i].min() != 0: dX = self.X[neighs_idx] - self.X[i, None] # 60% of runtime if self.sqrt_transform: dX = np.sqrt(np.abs(dX)) * np.sign(dX) val = cosine_correlation(dX, self.V[i]) # 40% of runtime else: val = np.zeros(len(neighs_idx)) vals.extend(val) rows.extend(np.ones(len(neighs_idx)) * i) cols.extend(neighs_idx) progress.update() progress.finish() vals = np.hstack(vals) vals[np.isnan(vals)] = 1e-10 # actually zero; just to store these entries in sparse matrix. self.graph, self.graph_neg = vals_to_csr(vals, rows, cols, shape=(n_obs, n_obs)) def velocity_graph(data, vkey='velocity', xkey='Ms', basis=None, n_neighbors=None, n_recurse_neighbors=None, n_top_genes=None, sqrt_transform=False, copy=False): """Computes a velocity graph based on cosine similarities. The cosine similarities are computed between velocities and potential cell state transitions. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. n_neighbors: `int` or `None` (default: None) Use fixed number of neighbors or do recursive neighbor search (if `None`). n_recurse_neighbors: `int` (default: 2) Number of recursions to be done for neighbors search. sqrt_transform: `bool` (default: `False`) Whether to variance-transform the cell states and velocities before computing cosine similarities. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes velocity_graph: `.uns` sparse matrix with transition probabilities """ adata = data.copy() if copy else data if vkey not in adata.layers.keys(): velocity(adata, vkey=vkey) logg.info('computing velocity graph', r=True) vgraph = VelocityGraph(adata, vkey=vkey, xkey=xkey, basis=basis, n_neighbors=n_neighbors, n_recurse_neighbors=n_recurse_neighbors, n_top_genes=n_top_genes, sqrt_transform=sqrt_transform) vgraph.compute_cosines() adata.uns[vkey+'_graph'] = vgraph.graph adata.uns[vkey+'_graph_neg'] = vgraph.graph_neg logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added \n' ' \'' + vkey + '_graph\', sparse matrix with cosine correlations (adata.uns)') return adata if copy else None PKkQMo #scvelo/tools/velocity_pseudotime.pyimport numpy as np def principal_curve(data, basis='pca', n_comps=4, clusters_list=None, copy=False): """Computes the principal curve Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'pca'`) Basis to use for computing the principal curve. n_comps: `int` (default: 4) Number of pricipal components to be used. copy: `bool`, (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes principal_curve: `.uns` dictionary containing `projections`, `ixsort` and `arclength` """ adata = data.copy() if copy else data import rpy2.robjects as robjects from rpy2.robjects.packages import importr if clusters_list is not None: cell_subset = np.array([label in clusters_list for label in adata.obs['clusters']]) X_emb = adata[cell_subset].obsm['X_' + basis][:, :n_comps] else: cell_subset = None X_emb = adata.obsm['X_' + basis][:, :n_comps] n_obs, n_dim = X_emb.shape # convert array to R matrix xvec = robjects.FloatVector(X_emb.T.reshape((X_emb.size))) X_R = robjects.r.matrix(xvec, nrow=n_obs, ncol=n_dim) fit = importr("princurve").principal_curve(X_R) adata.uns['principal_curve'] = dict() adata.uns['principal_curve']['ixsort'] = ixsort = np.array(fit[1])-1 adata.uns['principal_curve']['projections'] = np.array(fit[0])[ixsort] adata.uns['principal_curve']['arclength'] = np.array(fit[2]) adata.uns['principal_curve']['cell_subset'] = cell_subset return adata if copy else None # def pseudotime(adata, iroot=229, clusters_list=None, copy=False): # # iroot = adata.obsm['X_umap'][adata.obs['clusters']=='neuroblast 1'][:,1].argmax() # root_cell = adata.obs_names[iroot] # if clusters_list is not None: # ['granule mature', 'neuroblast 1', 'neuroblast 2', 'granule immature'] # cell_subset = np.array([label in clusters_list for label in adata.obs['clusters']]) # adata_subset = adata.copy() # adata_subset._inplace_subset_obs(cell_subset) # adata_subset.uns['iroot'] = np.where(adata_subset.obs_names == root_cell)[0][0] # dpt(adata_subset, n_branchings=0) # # adata.obs['dpt_pseudotime'] = np.zeros(adata.n_obs) # adata.obs['dpt_pseudotime'][cell_subset] = adata_subset.obs['dpt_pseudotime'] # adata.obs['dpt_pseudotime'][~cell_subset] = -.5 # # return adata if copy else None PKwMm9}/scvelo-0.1.10.dist-info/LICENSEBSD 3-Clause License Copyright (c) 2018, Theis Lab All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PK!HSmPOscvelo-0.1.10.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,rzd&Y)r$[)T&UrPK!HA:yA scvelo-0.1.10.dist-info/METADATAWrFߧ Nƒ턄ťPhұDҊݕ; $ʊ'Pd"k/ɫTy#)r/Wtɒr#O( y]6ݹ&ɔ:NdBy._:,h߈禠R ͼx0hcK*&q6Nt{|~"~棱1GV *]buM'k;L~IV>&r6B|,,_2ʋSTT&qJ5}%5PE-LCqB3'zbJO4tWu^<ɕsz qTW5KJ;ogW-Ͻc̚N)qjd'q)d74^U|E X)ʭM9]XUl[~^yrS㍩jdHtId K贜[(H;jO˷D;Ve|}4֡*Kn/a|uS^vٻe| nm׹I9VDw?lU'7-6bbS=)~|֜~OЗvXuM}?]TD̖\K%Y3@R)T|olĠI]nFb!6d_@fI֗$\,f5023@TаY=y]*GWYJjݑsk :;v~ޑ]">B-IK<)d9am^*RT> U,@3%]& H+/S4#by̋6Em %_F*ro8: FGß;"g+ٓ@¤sRVڔx5@5<M72eb=* FW#crV3gt l0>j&WϿ!?"hh/r1 xnR^@gz/5ђ$f:َ@ dPc;ǘqd_*mBel]s.4J@V4pqpplpvM K<6撧:A[x_>{IfaAKPo'+/6{1ߪP^Nz!85&CaTKcef.̀wE)՗-<$:Sܺ{~a]H6Hiͣ 4:6Lod/ G8M+\`[) 1vW:Ґr83JS`/N Vnr.Turũ0nST. 3kE5z\W #LLWIı\]z;"Fykj*=Xv;[.B6c5JuԮێ6 MRTe3d¬rP׫}ACK4@$7;S[%t/Ji_b@ie pZ-5}o56Nctm9BPK!H2k scvelo-0.1.10.dist-info/RECORDuַ|f&j!a  <W{zVBVAP4 y?  D#=s#œ{҈m=DR'D_ුcHr#ɢ wxGPxBYc0(Eg;|`$9~:ߪ{ȫV~]P;>;$ɾkUeEHIC檣ղH"6= M 4CKC&AՆɋuHS7.m T4P8-ȏ=Tjzg"}^suTM$,[(Ze7S1Ƹ%dR_b^MP]P/)h>fSn0X *uuW_?ŠfQ$;|eQk0[*l!̷&&7)QhN_ 58ciK'o:]j˭F0')_X8u̚EЃjdz +b{֬%y%$[u|XF-!a68Vm[=vS^y*E~~n(nv٧<[*y(ҼPE%Ev!$??m v%8ą=dE7̔h6< e ¦ .K>Piɬ,Rk\(2nLmytS&0㇐3g4yBexv}$c\>M7Fџ9> y:b}-2jcK])#7|0d%c>Zr +f6mzx1M8鰂-q{sT}_\aKN- ^^t;Lv B,J߃A6"IugV9m|~59y`y'q1#gmnqe ;xGmҨ7vN^y-,T$0?v+1#LHBpq[ 隿%3ܨH^0\/ah /\E]P V'/J 0Eso>Q.]q\.1.W@@k\-JxQة ˳Ny;6ib N<?hqgq1Eʪw#:ž8dGfi/er3y"@_PKWM scvelo/__init__.pyPKQM}rr scvelo/datasets.pyPKL+Mbx scvelo/get_version.pyPKPMɀscvelo/logging.pyPKWM-"scvelo/read_load.pyPKWM4scvelo/utils.pyPK|AMlS7scvelo/plotting/__init__.pyPK,MS* * 8scvelo/plotting/docs.pyPK Mv77Escvelo/plotting/pseudotime.pyPKXWM+tYuLscvelo/plotting/scatter.pyPKTVMo#qlscvelo/plotting/utils.pyPK͂TMimscvelo/plotting/velocity.pyPKXWM%';;%}scvelo/plotting/velocity_embedding.pyPKXWM[ ' '*scvelo/plotting/velocity_embedding_grid.pyPKnWM{*2 Mscvelo/preprocessing/__init__.pyPKnWMdWqqfscvelo/preprocessing/moments.pyPKWMC=<=<scvelo/preprocessing/utils.pyPK)VMAO7scvelo/tools/__init__.pyPKUMkJ#j9scvelo/tools/rank_velocity_genes.pyPKɄVMMOp3Xscvelo/tools/run.pyPKNMff^tscvelo/tools/solver.pyPKckQMU >p p scvelo/tools/terminal_states.pyPKUM:@ !scvelo/tools/transition_matrix.pyPKIVWM scvelo/tools/utils.pyPKRWM>##scvelo/tools/velocity.pyPK3jQM #scvelo/tools/velocity_confidence.pyPK9jQMp1'. . "scvelo/tools/velocity_embedding.pyPKVMs.5scvelo/tools/velocity_graph.pyPKkQMo #scvelo/tools/velocity_pseudotime.pyPKwMm9}/@scvelo-0.1.10.dist-info/LICENSEPK!HSmPOb scvelo-0.1.10.dist-info/WHEELPK!HA:yA  scvelo-0.1.10.dist-info/METADATAPK!H2k scvelo-0.1.10.dist-info/RECORDPK!!r