PK09ME77scvelo/__init__.py"""scvelo - stochastic single cell RNA velocity""" from .get_version import get_version __version__ = get_version(__file__) del get_version from .read_load import read, load from . import preprocessing as pp from . import tools as tl from . import plotting as pl from . import datasets from . import logging PK:MMe33scvelo/datasets.py"""Builtin Datasets. """ from .read_load import read, load from .preprocessing.utils import cleanup import numpy as np import pandas as pd def dentategyrus(): """Dentate Gyrus dataset from Hochgerner et al. (2018). Dentate gyrus is part of the hippocampus involved in learning, episodic memory formation and spatial coding. It is measured using 10X Genomics Chromium and described in Hochgerner et al. (2018). The data consists of 25,919 genes across 3,396 cells and provides several interesting characteristics. """ filename = 'data/DentateGyrus/10X43_1.loom' url = 'http://pklab.med.harvard.edu/velocyto/DG1/10X43_1.loom' adata = read(filename, backup_url=url, cleanup=True, sparse=True, cache=True) cleanup(adata, clean='all') url_louvain = 'https://github.com/theislab/scvelo_notebooks/raw/master/write/DG_clusters.npy' url_umap = 'https://github.com/theislab/scvelo_notebooks/raw/master/write/DG_umap.npy' adata.obs['louvain'] = pd.Categorical(load('./data/DentateGyrus/DG_clusters.npy', url_louvain)) adata.obsm['X_umap'] = load('./data/DentateGyrus/DG_umap.npy', url_umap) return adata def forebrain(): """Developing human forebrain. Forebrain tissue of a week 10 embryo, focusing on the glutamatergic neuronal lineage. """ filename = 'data/ForebrainGlut/hgForebrainGlut.loom' url = 'http://pklab.med.harvard.edu/velocyto/hgForebrainGlut/hgForebrainGlut.loom' adata = read(filename, backup_url=url, cleanup=True, sparse=True, cache=True) adata.var_names_make_unique() return adata def toy_data(n_obs): """Random samples from Dentate Gyrus. """ adata = dentategyrus() indices = np.random.choice(adata.n_obs, n_obs) adata = adata[indices] adata.obs_names = np.array(range(adata.n_obs), dtype='str') return adata PKL+Mbxscvelo/get_version.py""" A minimalistic version helper in the spirit of versioneer, that is able to run without build step using pkg_resources. Developed by P Angerer, see https://github.com/flying-sheep/get_version. """ import re import os from pathlib import Path from subprocess import run, PIPE, CalledProcessError from typing import NamedTuple, List, Union, Optional RE_VERSION = r"([\d.]+?)(?:\.dev(\d+))?(?:[_+-]([0-9a-zA-Z.]+))?" RE_GIT_DESCRIBE = r"v?(?:([\d.]+)-(\d+)-g)?([0-9a-f]{7})(-dirty)?" ON_RTD = os.environ.get("READTHEDOCS") == "True" def match_groups(regex, target): match = re.match(regex, target) if match is None: raise re.error(f"Regex does not match “{target}”. RE Pattern: {regex}", regex) return match.groups() class Version(NamedTuple): release: str dev: Optional[str] labels: List[str] @staticmethod def parse(ver): release, dev, labels = match_groups(f"{RE_VERSION}$", ver) return Version(release, dev, labels.split(".") if labels else []) def __str__(self): release = self.release if self.release else "0.0" dev = f".dev{self.dev}" if self.dev else "" labels = f'+{".".join(self.labels)}' if self.labels else "" return f"{release}{dev}{labels}" def get_version_from_dirname(name, parent): """Extracted sdist""" parent = parent.resolve() re_dirname = re.compile(f"{name}-{RE_VERSION}$") if not re_dirname.match(parent.name): return None return Version.parse(parent.name[len(name) + 1 :]) def get_version_from_git(parent): parent = parent.resolve() try: p = run(["git", "rev-parse", "--show-toplevel"], cwd=parent, stdout=PIPE, stderr=PIPE, encoding="utf-8", check=True) except (OSError, CalledProcessError): return None if Path(p.stdout.rstrip("\r\n")).resolve() != parent.resolve(): return None p = run(["git", "describe", "--tags", "--dirty", "--always", "--long", "--match", "v[0-9]*"], cwd=parent, stdout=PIPE, stderr=PIPE, encoding="utf-8", check=True) release, dev, hex_, dirty = match_groups( f"{RE_GIT_DESCRIBE}$", p.stdout.rstrip("\r\n") ) labels = [] if dev == "0": dev = None else: labels.append(hex_) if dirty and not ON_RTD: labels.append("dirty") return Version(release, dev, labels) def get_version_from_metadata(name: str, parent: Optional[Path] = None): try: from pkg_resources import get_distribution, DistributionNotFound except ImportError: return None try: pkg = get_distribution(name) except DistributionNotFound: return None # For an installed package, the parent is the install location path_pkg = Path(pkg.location).resolve() if parent is not None and path_pkg != parent.resolve(): msg = f"""\ metadata: Failed; distribution and package paths do not match: {path_pkg} != {parent.resolve()}\ """ return None return Version.parse(pkg.version) def get_version(package: Union[Path, str]) -> str: """Get the version of a package or module Pass a module path or package name. The former is recommended, since it also works for not yet installed packages. Supports getting the version from #. The directory name (as created by ``setup.py sdist``) #. The output of ``git describe`` #. The package metadata of an installed package (This is the only possibility when passing a name) Args: package: package name or module path (``…/module.py`` or ``…/module/__init__.py``) """ path = Path(package) if not path.suffix and len(path.parts) == 1: # Is probably not a path v = get_version_from_metadata(package) if v: return str(v) if path.suffix != ".py": msg = f"“package” is neither the name of an installed module nor the path to a .py file." if path.suffix: msg += f" Unknown file suffix {path.suffix}" raise ValueError(msg) if path.name == "__init__.py": name = path.parent.name parent = path.parent.parent else: name = path.with_suffix("").name parent = path.parent return str( get_version_from_dirname(name, parent) or get_version_from_git(parent) or get_version_from_metadata(name, parent) or "0.0.0" ) __version__ = get_version(__file__) if __name__ == "__main__": print(__version__)PK",M,scvelo/logging.pyfrom scanpy import settings from scanpy import logging as logg from datetime import datetime from time import time def get_passed_time(): now = time() elapsed = now - settings._previous_time settings._previous_time = now return elapsed def print_version(): from . import __version__ logg._write_log('Running scvelo', __version__, 'on {}.'.format(datetime.now().strftime("%Y-%m-%d %H:%M"))) def print_versions(): for mod in ['scvelo', 'scanpy', 'anndata', 'loompy', 'numpy', 'scipy', 'matplotlib', 'sklearn', 'pandas']: mod_name = mod[0] if isinstance(mod, tuple) else mod mod_install = mod[1] if isinstance(mod, tuple) else mod try: print('{}=={}'.format(mod_install, __import__(mod_name).__version__), end=' ') except (ImportError, AttributeError): pass print() from anndata.logging import print_memory_usage print_version_and_date = print_version PK8b:MB scvelo/read_load.pyimport os, numpy, pandas from urllib.request import urlretrieve from pathlib import Path from scanpy.api import read def load(filename, backup_url=None, **kwargs): numpy_ext = {'npy', 'npz'} pandas_ext = {'csv', 'txt'} if not os.path.exists(filename) and backup_url is None: raise FileNotFoundError('Did not find file {}.'.format(filename)) elif not os.path.exists(filename): d = os.path.dirname(filename) if not os.path.exists(d): os.makedirs(d) urlretrieve(backup_url, filename) ext = Path(filename).suffixes[-1][1:] if ext in numpy_ext: return numpy.load(filename, **kwargs) elif ext in pandas_ext: return pandas.read_csv(filename, **kwargs) else: raise ValueError('"{}" does not end on a valid extension.\n' 'Please, provide one of the available extensions.\n{}\n' .format(filename, numpy_ext|pandas_ext)) PKN&MYscvelo/plotting/__init__.pyfrom .scatter import scatter from .velocity import velocity from .velocity_embedding import velocity_embedding from .velocity_embedding_grid import velocity_embedding_grid PK,MS* * scvelo/plotting/docs.py"""Shared docstrings for plotting function parameters. """ from textwrap import dedent def doc_params(**kwds): """\ Docstrings should start with "\" in the first line for proper formatting. """ def dec(obj): obj.__doc__ = dedent(obj.__doc__).format(**kwds) return obj return dec doc_scatter = """\ basis: `str` (default='umap') Key for embedding. color: `str`, list of `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes use_raw : `bool` (default: `None`) Use `raw` attribute of `adata` if present. layer: `str`, list of `str` or `None` (default: `None`) Specify the layer for `color`. color_map: `str` (default: `matplotlib.rcParams['image.cmap']`) String denoting matplotlib color map. colorbar: `bool` (default: `False`) Whether to show colorbar. palette: list of `str` (default: `None`) Colors to use for plotting groups (categorical annotation). size: `float` (default: 5) Point size. alpha: `float` (default: 1) Set blending - 0 transparent to 1 opaque. perc: tuple, e.g. [2,98] (default: `None`) Specify percentile for continuous coloring. sort_order: `bool` (default: `True`) For continuous annotations used as color parameter, plot data points with higher values on top of others. groups: `str` (default: `all groups`) Restrict to a few categories in categorical observation annotation. components: `str` or list of `str` (default: '1,2') For instance, ['1,2', '2,3']. projection: {'2d', '3d'} (default: '2d') Projection of plot. legend_loc: str (default: 'none') Location of legend, either 'on data', 'right margin' or valid keywords for matplotlib.legend. legend_fontsize: `int` (default: `None`) Legend font size. legend_fontweight: {'normal', 'bold', ...} (default: `None`) Legend font weight. Defaults to 'bold' if `legend_loc = 'on data'`, otherwise to 'normal'. Available are `['light', 'normal', 'medium', 'semibold', 'bold', 'heavy', 'black']`. right_margin: `float` or list of `float` (default: `None`) Adjust the width of the space right of each plotting panel. left_margin: `float` or list of `float` (default: `None`) Adjust the width of the space left of each plotting panel. xlabel: `str` (default: `None`) Label of x-axis. ylabel: `str` (default: `None`) Label of y-axis. title: `str` (default: `None`) Provide title for panels either as, e.g. `["title1", "title2", ...]`. fontsize: `float` (default: `None`) Label font size. figsize: tuple (default: `(7,5)`) Figure size. dpi: `int` (default: 80) Figure dpi. frameon: `bool` (default: `True`) Draw a frame around the scatter plot. show: `bool`, optional (default: `None`) Show the plot, do not return axis. save: `bool` or `str`, optional (default: `None`) If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}. ax: `matplotlib.Axes`, optional (default: `None`) A matplotlib axes object. Only works if plotting a single component.\ """PK Mv77scvelo/plotting/pseudotime.pyfrom .utils import * def principal_curve(adata): X_curve = adata.uns['principal_curve']['projections'] ixsort = adata.uns['principal_curve']['ixsort'] pl.plot(X_curve[ixsort, 0], X_curve[ixsort, 1], c="k", lw=3, zorder=2000000) def pseudotime(adata, gene_list, ckey='velocity', reverse=False): ixsort = adata.uns['principal_curve']['ixsort'] arclength = adata.uns['principal_curve']['arclength'] if reverse: arclength /= np.max(arclength) else: arclength = (np.max(arclength) - arclength) / np.max(arclength) cell_subset = adata.uns['principal_curve']['cell_subset'] adata_subset = adata[cell_subset].copy() gs = pl.GridSpec(1, len(gene_list)) for n, gene in enumerate(gene_list): i = np.where(adata_subset.var_names == gene)[0][0] ax = pl.subplot(gs[n]) lb, ub = np.percentile(adata_subset.obsm[ckey][:, i], [.5, 99.5]) c = np.clip(adata_subset.obsm[ckey][ixsort, i], lb, ub) # pl.scatter(arclength[ixsort], adata2.obsm['Mu'][ixsort, i], alpha=0.7, c='b', s=5, label="unspliced") pl.scatter(arclength[ixsort], adata_subset.obsm['Ms'][ixsort, i] * adata_subset.uns['velocity_pars']['gamma'][i], c=c, cmap='coolwarm', alpha=1, s=1, label="spliced") c = c / abs(c).max() * (adata_subset.obsm['Ms'][ixsort, i] * adata_subset.uns['velocity_pars']['gamma'][i]).max() z = np.ma.polyfit(arclength[ixsort], c, 8) fun = np.poly1d(z) pl.plot(arclength[ixsort], fun(arclength[ixsort]), label=ckey) # Hide the right and top spines ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) pl.ylabel(gene) pl.title('Colored by ' + ckey) PK:M 1: for i, gs in enumerate(pl.GridSpec(1, len(colors), pl.figure(None, (figsize[0]*len(colors), figsize[1]), dpi=dpi))): scatter(adata, basis=basis, layer=layer, color=color[i], xlabel=xlabel, ylabel=ylabel, color_map=color_map, perc=perc, size=size, alpha=alpha, fontsize=fontsize, frameon=frameon, title=title, show=False, colorbar=colorbar, components=components, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), use_raw=use_raw, sort_order=sort_order, groups=groups, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, palette=palette, right_margin=right_margin, left_margin=left_margin, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(layers) > 1: for i, gs in enumerate(pl.GridSpec(1, len(layers), pl.figure(None, (figsize[0] * len(layers), figsize[1]), dpi=dpi))): scatter(adata, basis=basis, layer=layers[i], color=color, xlabel=xlabel, ylabel=ylabel, color_map=color_map, perc=perc, size=size, alpha=alpha, fontsize=fontsize, frameon=frameon, title=title, show=False, colorbar=colorbar, components=components, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), use_raw=use_raw, sort_order=sort_order, groups=groups, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, palette=palette, right_margin=right_margin, left_margin=left_margin, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: if ax is None: ax = pl.figure(None, figsize, dpi=dpi).gca() if color_map is None: color_map = 'viridis_r' if isinstance(color, str) and (color == 'root' or color == 'end') else 'RdBu_r' if color is None: color = 'clusters' if 'clusters' in adata.obs.keys() else 'louvain' if 'louvain' in adata.obs.keys() else 'grey' is_embedding = (x is None) | (y is None) if isinstance(color, str) and color in adata.obs.keys() \ and adata.obs[color].dtype.name == 'category' and is_embedding: ax = scpl.scatter(adata, color=color, use_raw=use_raw, sort_order=sort_order, alpha=alpha, basis=basis, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, right_margin=right_margin, left_margin=left_margin, size=size, title=title, frameon=frameon, show=False, save=None, ax=ax, **kwargs) else: if is_embedding: X_emb = adata.obsm['X_' + basis][:, get_components(components)] x, y = X_emb[:, 0], X_emb[:, 1] ax.tick_params(which='both', bottom=False, left=False, labelbottom=False, labelleft=False) else: ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) labelsize = int(fontsize * .75) if fontsize is not None else None ax.tick_params(axis='both', which='major', labelsize=labelsize) c = interpret_colorkey(adata, color, layer, perc) pl.scatter(x, y, c=c, cmap=color_map, s=size, alpha=alpha, zorder=0, **kwargs) if isinstance(xlabel, str) and isinstance(ylabel, str): pl.xlabel(xlabel, fontsize=fontsize) pl.ylabel(ylabel, fontsize=fontsize) elif basis is not None: component_name = ('DC' if basis == 'diffmap' else 'tSNE' if basis == 'tsne' else 'UMAP' if basis == 'umap' else 'PC' if basis == 'pca' else basis.replace('draw_graph_', '').upper() if 'draw_graph' in basis else basis) pl.xlabel(component_name + '1') pl.ylabel(component_name + '2') if isinstance(title, str): pl.title(title, fontsize=fontsize) elif isinstance(layer, str) and isinstance(color, str): pl.title(color + ' ' + layer, fontsize=fontsize) elif isinstance(color, str): pl.title(color, fontsize=fontsize) if not frameon: pl.axis('off') if colorbar and len(c) == adata.n_obs and c.dtype: plot_colorbar(ax) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return axPK:Mr?scvelo/plotting/utils.pyimport numpy as np import pandas as pd import matplotlib.pyplot as pl from matplotlib.ticker import MaxNLocator from mpl_toolkits.axes_grid1.inset_locator import inset_axes from scanpy.plotting.utils import savefig_or_show zeileis_26 = [ "#023fa5", "#7d87b9", "#bec1d4", "#d6bcc0", "#bb7784", "#8e063b", "#4a6fe3", "#8595e1", "#b5bbe3", "#e6afb9", "#e07b91", "#d33f6a", "#11c638", "#8dd593", "#c6dec7", "#ead3c6", "#f0b98d", "#ef9708", "#0fcfc0", "#9cded6", "#d5eae7", "#f3e1eb", "#f6c4e1", "#f79cd4", '#7f7f7f', "#c7c7c7", "#1CE6FF", "#336600"] def get_colors(adata, basis): if adata.obs[basis].dtype.name != 'category': adata.obs[basis] = pd.Categorical(adata.obs[basis]) clusters_colors = adata.uns[basis + '_colors'] if basis+'_colors' in adata.uns.keys() else zeileis_26 cluster_ix = adata.obs[basis].cat.codes return np.array([clusters_colors[cluster_ix[i]] for i in range(adata.n_obs)]) def bound(c, perc=None): lb, ub = np.percentile(c, perc) return np.clip(c, lb, ub) def interpret_colorkey(adata, c=None, layer=None, perc=None): if isinstance(c, str): if (c == 'louvain' or c == 'clusters') and adata.obs[c].dtype.name != 'category': adata.obs[c] = pd.Categorical(adata.obs[c]) if c in adata.obs.keys(): # color by observation key c = get_colors(adata, basis=c) if adata.obs[c].dtype.name == 'category' \ else adata.obs[c] if perc is None else bound(adata.obs[c], perc=perc) elif c in adata.var_names: # color by var in specific layer c = adata[:, c].layers[layer] if layer in adata.layers.keys() else adata[:, c].X if perc is not None: c = bound(c, perc=perc) elif c is None: # color by cluster or louvain or grey if no color is specified c = get_colors(adata, 'clusters') if 'clusters' in adata.obs.keys() \ else get_colors(adata, 'louvain') if 'louvain' in adata.obs.keys() else 'grey' elif len(np.array(c).flatten()) == adata.n_obs: # continuous coloring c = np.array(c).flatten() if perc is None else bound(np.array(c).flatten(), perc=perc) else: raise ValueError('color key is invalid! pass valid observation annotation or a gene name') return c def get_components(components=None): if components is None: components = '1,2' if isinstance(components, str): components = components.split(',') return np.array(components).astype(int) - 1 def plot_colorbar(ax, orientation='vertical'): cb = pl.colorbar(orientation=orientation, cax=inset_axes(ax, width="2%", height="30%", loc=4, borderpad=0)) cb.locator = (MaxNLocator(nbins=3)) cb.update_ticks() def savefig(writekey, show=False, dpi=None, save=None): """Save current figure to file. """ savefig_or_show('velocity_' + writekey + '_' if writekey != '' else 'velocity_', dpi=dpi, save=save, show=show) # def phase(adata, var=None, x=None, y=None, color='louvain', fits='all', xlabel='spliced', ylabel='unspliced', # fontsize=None, show=True, ax=None, **kwargs): # if isinstance(var, str) and (var in adata.var_names): # if (x is None) or (y is None): # ix = np.where(adata.var_names == var)[0][0] # x, y = adata.layers['Ms'][:, ix], adata.layers['Mu'][:, ix] # else: # ValueError('var not found in adata.var_names.') # # ax = scatter(adata, x=x, y=y, color=color, frameon=True, title=var, xlabel=xlabel, ylabel=ylabel, ax=ax, **kwargs) # # xnew = np.linspace(0, x.max() * 1.02) # fits = adata.layers.keys() if fits == 'all' else fits # fits = [fit for fit in fits if 'velocity' in fit] # for fit in fits: # linestyle = '--' if 'stochastic' in fit else '-' # pl.plot(xnew, adata.var[fit+'_gamma'][ix] / adata.var[fit+'_beta'][ix] * xnew # + adata.var[fit+'_offset'][ix] / adata.var[fit+'_beta'][ix], c='k', linestyle=linestyle) # # if show: pl.show() # else: return axPKS,M77scvelo/plotting/velocity.pyfrom ..preprocessing.moments import second_order_moments from .scatter import scatter from .utils import savefig import numpy as np from matplotlib.ticker import MaxNLocator import matplotlib.pyplot as pl def velocity(adata, var_names=None, basis='umap', mode=None, fits='all', layers='all', color=None, color_map='RdBu_r', perc=[2,98], size=.2, alpha=.5, fontsize=8, figsize=(3,2), dpi=120, show=True, save=None, ax=None, **kwargs): """Phase and velocity plot for set of genes. The phase plot shows spliced against unspliced expressions with steady-state fit. Further the embedding is shown colored by velocity and expression. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. var_names: `str` or list of `str` (default: `None`) Which variables to show. basis: `str` (default: `'umap'`) Key for embedding coordinates. mode: `'stochastic'` or `None` (default: `None`) Whether to show show covariability phase portrait. fits: `str` or list of `str` (default: `'all'`) Which steady-state estimates to show. layers: `str` or list of `str` (default: `'all'`) Which layers to show. color: `str`, list of `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes color_map: `str` (default: `matplotlib.rcParams['image.cmap']`) String denoting matplotlib color map. perc: tuple, e.g. [2,98] (default: `None`) Specify percentile for continuous coloring. size: `float` (default: 5) Point size. alpha: `float` (default: 1) Set blending - 0 transparent to 1 opaque. fontsize: `float` (default: `None`) Label font size. figsize: tuple (default: `(7,5)`) Figure size. dpi: `int` (default: 80) Figure dpi. show: `bool`, optional (default: `None`) Show the plot, do not return axis. save: `bool` or `str`, optional (default: `None`) If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}. ax: `matplotlib.Axes`, optional (default: `None`) A matplotlib axes object. Only works if plotting a single component. """ var_names = [var_names] if isinstance(var_names, str) else var_names var_names = [var for var in var_names if var in adata.var_names[adata.var.velocity_genes]] \ if isinstance(var_names, list) else adata.var_names[(adata.layers['spliced'] > 0).sum(0).argsort()[::-1][:4]] layers = ['velocity', 'Ms', 'variance_velocity'] if layers == 'all' else layers layers = [layer for layer in layers if layer in adata.layers.keys()] fits = adata.layers.keys() if fits == 'all' else fits fits = [fit for fit in fits if all(['velocity' in fit, fit + '_gamma' in adata.var.keys()])] n_row, n_col = len(var_names), (1 + len(layers) + (mode == 'stochastic')*2) ax = pl.figure(figsize=(figsize[0]*n_col, figsize[1]*n_row), dpi=dpi) if ax is None else ax gs = pl.GridSpec(n_row, n_col, wspace=0.3, hspace=0.5) for v, var in enumerate(var_names): ix = np.where(adata.var_names == var)[0][0] s, u = adata.layers['Ms'][:, ix], adata.layers['Mu'][:, ix] # spliced/unspliced phase portrait with steady-state estimate ax = pl.subplot(gs[v * n_col]) scatter(adata, x=s, y=u, color=color, frameon=True, title=var, xlabel='spliced', ylabel='unspliced', show=False, save=False, ax=ax, fontsize=fontsize, size=size, alpha=alpha, **kwargs) xnew = np.linspace(0, s.max() * 1.02) for fit in fits: linestyle = '--' if 'stochastic' in fit else '-' pl.plot(xnew, adata.var[fit + '_gamma'][ix] / adata.var[fit + '_beta'][ix] * xnew + adata.var[fit + '_offset'][ix] / adata.var[fit + '_beta'][ix], c='k', linestyle=linestyle) if v == len(var_names)-1: pl.legend(fits, loc='lower right', prop={'size': .5*fontsize}) ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) ax.tick_params(axis='both', which='major', labelsize=.7*fontsize) # velocity and expression plots for l, layer in enumerate(layers): ax = pl.subplot(gs[v*n_col + l + 1]) title = 'expression' if layer == 'Ms' else layer scatter(adata, basis=basis, color=var, layer=layer, color_map=color_map, title=title, perc=perc, fontsize=fontsize, size=size, alpha=alpha, show=False, ax=ax, save=False, **kwargs) if mode == 'stochastic': ss, us = second_order_moments(adata) ax = pl.subplot(gs[v*n_col + len(layers) + 1]) x = 2 * (ss - s**2) - s y = 2 * (us - u * s) + u + 2 * s * \ adata.var['stochastic_velocity_offset'][ix] / adata.var['stochastic_velocity_beta'][ix] scatter(adata, x=x, y=y, color=color, title=var, fontsize=40/n_col, show=False, ax=ax, save=False, perc=perc, xlabel=r'2 $\Sigma_s - \langle s \rangle$', ylabel=r'2 $\Sigma_{us} + \langle u \rangle$', **kwargs) xnew = np.linspace(x.min(), x.max() * 1.02) fits = adata.layers.keys() if fits == 'all' else fits fits = [fit for fit in fits if 'velocity' in fit] for fit in fits: linestyle = '--' if 'stochastic' in fit else '-' pl.plot(xnew, adata.var[fit + '_gamma'][ix] / adata.var[fit + '_beta'][ix] * xnew + adata.var[fit + '_offset2'][ix] / adata.var[fit + '_beta'][ix], c='k', linestyle=linestyle) if v == len(var_names) - 1: pl.legend(fits, loc='lower right', prop={'size': 34/n_col}) if isinstance(save, str): savefig('', dpi=dpi, save=save, show=show) if show: pl.show() else: return ax PK:M:%scvelo/plotting/velocity_embedding.pyfrom ..tools.velocity_embedding import velocity_embedding as tl_velocity_embedding from .utils import interpret_colorkey, get_components, savefig from .scatter import scatter from .docs import doc_scatter, doc_params from matplotlib.colors import is_color_like import matplotlib.pyplot as pl import numpy as np @doc_params(scatter=doc_scatter) def velocity_embedding(adata, basis=None, vkey='velocity', density=1, scale=1, color=None, use_raw=None, layer=None, color_map=None, colorbar=False, palette=None, size=None, alpha=.2, perc=None, sort_order=True, groups=None, components=None, projection='2d', legend_loc='none', legend_fontsize=None, legend_fontweight=None, right_margin=None, left_margin=None, xlabel=None, ylabel=None, title=None, fontsize=None, figsize=(14,10), dpi=150, frameon=False, show=True, save=None, ax=None, **kwargs): """\ Scatter plot of velocities on the embedding Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate vkey: `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. density: `float` (default: 1) Amount of velocities to show - 0 none to 1 all scale: `float` (default: 1) Length of velocities in the embedding. {scatter} Returns ------- `matplotlib.Axis` if `show==False` """ if basis is None: basis = [key for key in ['pca', 'tsne', 'umap'] if 'X_' + key in adata.obsm.keys()][-1] colors = color if isinstance(color, (list, tuple)) else [color] layers = layer if isinstance(layer, (list, tuple)) else [layer] vkeys = vkey if isinstance(vkey, (list, tuple)) else [vkey] for key in vkeys: if key + '_' + basis not in adata.obsm_keys(): tl_velocity_embedding(adata, basis=basis, vkey=key) if len(colors) > 1: for i, gs in enumerate(pl.GridSpec(1, len(colors), pl.figure(None, (figsize[0] * len(colors), figsize[1]), dpi=dpi))): velocity_embedding(adata, basis=basis, vkey=vkey, layer=layer, density=density, scale=scale, perc=perc, color=colors[i], use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(layers) > 1: for i, gs in enumerate(pl.GridSpec(1, len(layers), pl.figure(None, (figsize[0] * len(layers), figsize[1]), dpi=dpi))): velocity_embedding(adata, basis=basis, vkey=vkey, layer=layers[i], density=density, scale=scale, perc=None, color=color, use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(vkeys) > 1: for i, gs in enumerate(pl.GridSpec(1, len(vkeys), pl.figure(None, (figsize[0] * len(vkeys), figsize[1]), dpi=dpi))): velocity_embedding(adata, basis=basis, vkey=vkeys[i], layer=layer, density=density, scale=scale, perc=None, color=color, use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: ix_choice = np.random.choice(adata.n_obs, size=int(density * adata.n_obs), replace=False) X = adata.obsm['X_' + basis][:, get_components(components)][ix_choice] V = adata.obsm[vkey + '_' + basis][ix_choice] if color_map is None: color_map = 'viridis_r' if isinstance(color, str) and (color == 'root' or color == 'end') else 'RdBu_r' if color is None: color = 'clusters' if 'clusters' in adata.obs.keys() else 'louvain' if 'louvain' in adata.obs.keys() else 'grey' _kwargs = {"scale": scale, "cmap": color_map, "angles": 'xy', "scale_units": 'xy', "width": .0005, "edgecolors": 'k', "headwidth": 9, "headlength": 10, "headaxislength": 6, "linewidth": .25} _kwargs.update(kwargs) if ax is None: ax = pl.figure(None, figsize, dpi=dpi).gca() C = interpret_colorkey(adata, color, layer, perc) if len(C) == adata.n_obs: C = C[ix_choice] if is_color_like(C[0]): pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], color=C, zorder=1, **_kwargs) else: pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], C, zorder=1, **_kwargs) ax = scatter(adata, basis=basis, layer=layer, color=color, xlabel=xlabel, ylabel=ylabel, color_map=color_map, perc=perc, size=size, alpha=alpha, fontsize=fontsize, frameon=frameon, title=title, show=False, colorbar=colorbar, components=components, figsize=figsize, dpi=dpi, save=None, ax=ax, use_raw=use_raw, sort_order=sort_order, groups=groups, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, palette=palette, right_margin=right_margin, left_margin=left_margin, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax PK:M%%*scvelo/plotting/velocity_embedding_grid.pyfrom ..tools.velocity_embedding import quiver_autoscale, velocity_embedding from .utils import get_components, savefig from .scatter import scatter from .docs import doc_scatter, doc_params from sklearn.neighbors import NearestNeighbors from scipy.stats import norm as normal import matplotlib.pyplot as pl import numpy as np def compute_velocity_on_grid(X_emb, V_emb, density=1, smooth=0.5, n_neighbors=None, min_mass=.5): # prepare grid n_obs, n_dim = X_emb.shape grs = [] for dim_i in range(n_dim): m, M = np.min(X_emb[:, dim_i]), np.max(X_emb[:, dim_i]) m = m - .01 * np.abs(M - m) M = M + .01 * np.abs(M - m) gr = np.linspace(m, M, 50 * density) grs.append(gr) meshes_tuple = np.meshgrid(*grs) X_grid = np.vstack([i.flat for i in meshes_tuple]).T # estimate grid velocities if n_neighbors is None: n_neighbors = int(n_obs/50) nn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=-1) nn.fit(X_emb) dists, neighs = nn.kneighbors(X_grid) std = np.mean([(g[1] - g[0]) for g in grs]) weight = normal.pdf(loc=0, scale=smooth * std, x=dists) p_mass = weight.sum(1) V_grid = (V_emb[neighs] * weight[:, :, None]).sum(1) / np.maximum(1, p_mass)[:, None] X_grid, V_grid = X_grid[p_mass > min_mass], V_grid[p_mass > min_mass] V_grid /= 3 * quiver_autoscale(X_grid[:, 0], X_grid[:, 1], V_grid[:, 0], V_grid[:, 1]) return X_grid, V_grid @doc_params(scatter=doc_scatter) def velocity_embedding_grid(adata, basis=None, vkey='velocity', density=1, scale=1, min_mass=.5, smooth=.5, n_neighbors=None, X=None, V=None, principal_curve=False, color=None, use_raw=None, layer=None, color_map=None, colorbar=False, palette=None, size=None, alpha=.2, perc=None, sort_order=True, groups=None, components=None, projection='2d', legend_loc='none', legend_fontsize=None, legend_fontweight=None, right_margin=None, left_margin=None, xlabel=None, ylabel=None, title=None, fontsize=None, figsize=(7,5), dpi=100, frameon=False, show=True, save=None, ax=None, **kwargs): """\ Scatter plot of velocities for the grid points on the embedding Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate vkey: `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. density: `float` (default: 1) Amount of velocities to show - 0 none to 1 all scale: `float` (default: 1) Length of velocities in the embedding. min_mass: `float` (default: 0.5) Minimum threshold for mass to be shown. smooth: `float` (default: 0.5) Multiplication factor for scale in Gaussian kernel around grid point. n_neighbors: `int` (default: None) Number of neighbors to consider around grid point. X: `np.ndarray` (default: None) embedding grid point coordinates V: `np.ndarray` (default: None) embedding grid velocity coordinates {scatter} Returns ------- `matplotlib.Axis` if `show==False` """ if basis is None: basis = [key for key in ['pca', 'tsne', 'umap'] if 'X_' + key in adata.obsm.keys()][-1] colors = color if isinstance(color, (list, tuple)) else [color] layers = layer if isinstance(layer, (list, tuple)) else [layer] vkeys = vkey if isinstance(vkey, (list, tuple)) else [vkey] for key in vkeys: if key + '_' + basis not in adata.obsm_keys(): velocity_embedding(adata, basis=basis, vkey=key) if X is None and V is None: X, V = compute_velocity_on_grid(X_emb=adata.obsm['X_' + basis][:, get_components(components)], V_emb=adata.obsm[vkeys[0] + '_' + basis], density=density, smooth=smooth, n_neighbors=n_neighbors, min_mass=min_mass) if len(colors) > 1: for i, gs in enumerate(pl.GridSpec(1, len(colors), pl.figure(None, (figsize[0] * len(colors), figsize[1]), dpi=dpi))): velocity_embedding_grid(adata, basis=basis, vkey=vkey, layer=layer, density=density, scale=scale, X=X, V=V, perc=perc, color=colors[i], min_mass=min_mass, smooth=smooth, n_neighbors=n_neighbors, principal_curve=principal_curve, use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(layers) > 1: for i, gs in enumerate(pl.GridSpec(1, len(layers), pl.figure(None, (figsize[0] * len(layers), figsize[1]), dpi=dpi))): velocity_embedding_grid(adata, basis=basis, vkey=vkey, layer=layers[i], density=density, scale=scale, X=X, V=V, perc=perc, color=color, min_mass=min_mass, smooth=smooth, n_neighbors=n_neighbors, principal_curve=principal_curve, use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax elif len(vkeys) > 1: for i, gs in enumerate(pl.GridSpec(1, len(vkeys), pl.figure(None, (figsize[0] * len(vkeys), figsize[1]), dpi=dpi))): velocity_embedding_grid(adata, basis=basis, vkey=vkeys[i], layer=layer, density=density, scale=scale, X=X, V=V, perc=perc, color=color, min_mass=min_mass, smooth=smooth, n_neighbors=n_neighbors, principal_curve=principal_curve, use_raw=use_raw, sort_order=sort_order, alpha=alpha, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, figsize=figsize, dpi=dpi, save=None, ax=pl.subplot(gs), xlabel=xlabel, ylabel=ylabel, colorbar=colorbar, fontsize=fontsize, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: _kwargs = {"scale": scale, "angles": 'xy', "scale_units": 'xy', "width": .001, "color": 'black', "edgecolors": 'k', "headwidth": 4.5, "headlength": 5, "headaxislength": 3, "linewidth": .2} _kwargs.update(kwargs) if color_map is None: color_map = 'viridis_r' if isinstance(color, str) and (color == 'root' or color == 'end') else 'RdBu_r' if ax is None: ax = pl.figure(None, figsize, dpi=dpi).gca() pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], **_kwargs, zorder=1) if principal_curve: curve = adata.uns['principal_curve']['projections'] pl.plot(curve[:, 0], curve[:, 1], c="w", lw=6, zorder=2) pl.plot(curve[:, 0], curve[:, 1], c="k", lw=3, zorder=3) ax = scatter(adata, basis=basis, layer=layer, color=color, xlabel=xlabel, ylabel=ylabel, color_map=color_map, perc=perc, size=size, alpha=alpha, fontsize=fontsize, frameon=frameon, title=title, show=False, colorbar=colorbar, components=components, figsize=(7, 5), dpi=80, save=None, ax=ax, use_raw=use_raw, sort_order=sort_order, groups=groups, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, palette=palette, right_margin=right_margin, left_margin=left_margin, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax PKk(MφFiqq scvelo/preprocessing/__init__.pyfrom .utils import show_proportions, cleanup, filter_and_normalize, recipe_velocity from .moments import moments PKC~:Mscvelo/preprocessing/moments.pyfrom scipy.sparse import csr_matrix from ..logging import logg, settings def normalize_layers(adata, layers={'spliced', 'unspliced'}, copy=False): """Normalize by total counts to median """ from scanpy.api.pp import normalize_per_cell, filter_cells for layer in layers: subset, counts = filter_cells(adata.layers[layer], min_counts=1) adata.layers[layer] = normalize_per_cell(adata.layers[layer], None, counts, copy=True) return adata if copy else None def get_connectivities(adata, mode): connectivities = adata.uns['neighbors'][mode] connectivities.setdiag(1) connectivities = connectivities.multiply(1. / connectivities.sum(1)) return connectivities def moments(adata, n_neighbors=30, n_pcs=30, mode='connectivities', renormalize=False, copy=False): """Computes first order moments for velocity estimation. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. n_neighbors: `int` (default: 30) Number of neighbors to use. n_pcs: `int` (default: 30) Number of principal components to use. mode: `'connectivities'` or `'distances'` (default: `'connectivities'`) Distance metric to use for moment computation. renormalize: `bool` (default: `False`) Renormalize the moments by total counts per cell to its median. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes Ms: `.layers` dense matrix with first order moments of spliced counts. Mu: `.layers` dense matrix with first order moments of unspliced counts. """ if 'neighbors' not in adata.uns.keys() or n_neighbors > adata.uns['neighbors']['params']['n_neighbors']: from scanpy.api.pp import neighbors, pca if 'X_pca' not in adata.obsm.keys() or n_pcs > adata.obsm['X_pca'].shape[1]: pca(adata, n_comps=n_pcs, svd_solver='arpack') neighbors(adata, n_neighbors=n_neighbors, use_rep='X_pca') if mode not in adata.uns['neighbors']: raise ValueError('mode can only be \'connectivities\' or \'distances\'') logg.info('computing moments', r=True) normalize_layers(adata) connectivities = get_connectivities(adata, mode) #connectivities += connectivities.dot(connectivities*.5) adata.layers['Ms'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['spliced'])).A adata.layers['Mu'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['unspliced'])).A if renormalize: normalize_layers(adata, layers={'Ms', 'Mu'}) logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.layers`\n' ' \'Ms\', moments of spliced abundances\n' ' \'Mu\', moments of unspliced abundances') return adata if copy else None def second_order_moments(adata): """Computes second order moments for stochastic velocity estimation. Arguments --------- adata: `AnnData` Annotated data matrix. Returns ------- Mss: Second order moments for spliced abundances Mus: Second order moments for spliced with unspliced abundances """ if 'neighbors' not in adata.uns: raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.') connectivities = get_connectivities(adata, 'connectivities') s, u = csr_matrix(adata.layers['spliced']), csr_matrix(adata.layers['unspliced']) Mss = csr_matrix.dot(connectivities, s.multiply(s)).A Mus = csr_matrix.dot(connectivities, s.multiply(u)).A return Mss, Mus def second_order_moments_u(adata): """Computes second order moments for stochastic velocity estimation. Arguments --------- adata: `AnnData` Annotated data matrix. Returns ------- Muu: Second order moments for unspliced abundances """ if 'neighbors' not in adata.uns: raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.') connectivities = get_connectivities(adata, 'connectivities') u = csr_matrix(adata.layers['unspliced']) Muu = csr_matrix.dot(connectivities, u.multiply(u)).A return MuuPKt.MZU||scvelo/preprocessing/utils.pyimport numpy as np def show_proportions(adata, copy=False): """Fraction of spliced/unspliced/ambiguous abundances Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Prints the fractions of abundances. """ layers_keys = [key for key in ['spliced', 'unspliced', 'ambiguous'] if key in adata.layers.keys()] tot_mol_cell_layers = [adata.layers[key].sum(1) for key in layers_keys] mean_abundances = np.round( [np.mean(tot_mol_cell / np.sum(tot_mol_cell_layers, 0)) for tot_mol_cell in tot_mol_cell_layers], 2) print('abundance of ' + str(layers_keys) + ': ' + str(mean_abundances)) return adata if copy else None def cleanup(adata, clean='layers', keep={'spliced', 'unspliced'}, copy=False): """Deletes attributes not needed. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. cleanup: `str` or list of `str` (default: `layers`) Which attributes to consider for freeing memory. keep: `str` or list of `str` (default: `['spliced', unspliced']`) Which attributes to keep. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with selection of attributes kept. """ if any(['obs' in clean, 'all' in clean]): for key in list(adata.obs.keys()): if key not in keep: del adata.obs[key] if any(['var' in clean, 'all' in clean]): for key in list(adata.var.keys()): if key not in keep: del adata.var[key] # obsm and varm cannot be deleted when only 1 key left -> PR for anndata #if any(['obsm' in clean, 'all' in clean]): # for key in list(adata.obsm.keys()): # if key not in keep: del adata.obsm[key] #if any(['varm' in clean, 'all' in clean]): # for key in list(adata.varm.keys()): # if key not in keep: del adata.varm[key] if any(['uns' in clean, 'all' in clean]): for key in list(adata.uns.keys()): if key not in keep: del adata.uns[key] if any(['layers' in clean, 'all' in clean]): for key in list(adata.layers.keys()): # remove layers that are not needed if key not in keep: del adata.layers[key] return adata if copy else None def filter_and_normalize(adata, min_counts=10, n_top_genes=4000, log=True, copy=False): """Filtering, normalization and log transform Expects non-logarithmized data. If using logarithmized data, pass `log=False`. Runs the following steps .. code:: python sc.pp.filter_genes(adata, min_counts=10) sc.pp.normalize_per_cell(adata) sc.pp.filter_genes_dispersion(adata, n_top_genes=10000) sc.pp.normalize_per_cell(adata) if log: sc.pp.log1p(adata) Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. min_counts: `int` (default: 10) Minimum number of gene counts per cell. n_top_genes: `int` (default: 10000) Number of genes to keep. log: `bool` (default: `True`) Take logarithm. copy: `bool` (default: `False`) Return a copy of `adata` instead of updating it. Returns ------- Returns or updates `adata` depending on `copy`. """ from scanpy.api.pp import filter_genes, filter_genes_dispersion, normalize_per_cell, log1p filter_genes(adata, min_counts=min_counts) if n_top_genes is None or n_top_genes < adata.shape[1]: normalize_per_cell(adata) filter_genes_dispersion(adata, n_top_genes=n_top_genes) normalize_per_cell(adata) if log: log1p(adata) return adata if copy else None def recipe_velocity(adata, min_counts=10, n_top_genes=None, n_pcs=30, n_neighbors=30, log=True, copy=False): """Runs pp.filter_and_normalize() and pp.moments() """ from .moments import moments filter_and_normalize(adata, min_counts=min_counts, n_top_genes=n_top_genes, log=log) moments(adata, n_neighbors=n_neighbors, n_pcs=n_pcs) return adata if copy else None PK|8Më866scvelo/tools/__init__.pyfrom .velocity import velocity from .velocity_graph import velocity_graph from .transition_matrix import transition_matrix from .velocity_embedding import velocity_embedding from .velocity_confidence import velocity_confidence, velocity_confidence_transition from .terminal_states import eigs, terminal_states PK~5MH;scvelo/tools/solver.pyfrom .utils import prod_sum_obs from scipy.optimize import minimize import numpy as np import warnings def solve_cov(x, y, fit_offset=False): """Solution to least squares: gamma = cov(X,Y) / var(X) """ n_obs, n_var = x.shape with warnings.catch_warnings(): warnings.simplefilter("ignore") if fit_offset: cov_xy, cov_xx = prod_sum_obs(x, y) / n_obs, prod_sum_obs(x, x) / n_obs mean_x, mean_y = x.mean(0), y.mean(0) numerator_offset = cov_xx * mean_y - cov_xy * mean_x numerator_gamma = cov_xy - mean_x * mean_y offset, gamma = (numerator_offset, numerator_gamma) / (cov_xx - mean_x * mean_x) else: offset, gamma = np.zeros(n_var), prod_sum_obs(x, y) / prod_sum_obs(x, x) return offset, gamma def solve2_inv(x, y, x2, y2, res_std=None, res2_std=None, fit_offset=False, fit_offset2=False): """Solution to the 2-dim generalized least squares: gamma = inv(X'QX)X'QY """ n_obs, n_var = x.shape offset, offset_ss = np.zeros(n_var, dtype="float32"), np.zeros(n_var, dtype="float32") gamma = np.ones(n_var, dtype="float32") if (res_std is None) or (res2_std is None): res_std, res2_std = np.ones(n_var), np.ones(n_var) ones, zeros = np.ones(n_obs), np.zeros(n_obs) with warnings.catch_warnings(): warnings.simplefilter("ignore") x, y = np.vstack((x/res_std, x2/res2_std)), np.vstack((y/res_std, y2/res2_std)) if fit_offset and fit_offset2: for i in range(n_var): A = np.c_[np.vstack((np.c_[ones/res_std[i], zeros], np.c_[zeros, ones/res2_std[i]])), x[:, i]] offset[i], offset_ss[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) elif fit_offset: for i in range(n_var): A = np.c_[np.hstack((ones/res_std[i], zeros)), x[:, i]] offset[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) elif fit_offset2: for i in range(n_var): A = np.c_[np.hstack((zeros, ones/res2_std[i])), x[:, i]] offset_ss[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) else: for i in range(n_var): A = np.c_[x[:, i]] gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) return offset, offset_ss, gamma def solve2_mle(Ms, Mu, Mus, Mss, fit_offset=False, fit_offset2=False): """Maximizing the log likelihood using weights according to empirical bayes """ n_obs, n_var = Ms.shape offset, offset_ss = np.zeros(n_var, dtype="float32"), np.zeros(n_var, dtype="float32") gamma = np.ones(n_var, dtype="float32") def sse(A, data, b): sigma = (A.dot(data) - b).std(1) return np.log(sigma).sum() # np.log(np.sqrt(2*np.pi) * sigma).sum() + (.5 * (res/sigma[:, None])**2).sum() if fit_offset and fit_offset2: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset[i], offset_ss[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[2], 0, 0], [1, m[2], 2, -2 * m[2]]]), data, b=np.array(m[0], m[1])), x0=(1e-4, 1e-4, 1), method="L-BFGS-B").x elif fit_offset: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[1], 0, 0], [1, m[1], 2, -2 * m[1]]]), data, b=np.array(m[0], 0)), x0=(1e-4, 1), method="L-BFGS-B").x elif fit_offset2: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset_ss[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[1], 0, 0], [1, m[1], 2, -2 * m[1]]]), data, b=np.array(0, m[0])), x0=(1e-4, 1), method="L-BFGS-B").x else: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m, 0, 0], [1, m, 2, -2 * m]]), data, b=0), x0=gamma[i], method="L-BFGS-B").x return offset, offset_ss, gamma PK:Mco scvelo/tools/terminal_states.pyfrom ..logging import logg, settings from ..preprocessing.moments import get_connectivities from .transition_matrix import transition_matrix from .utils import scale from scipy.sparse import linalg, csr_matrix import numpy as np def eigs(T, k=10, eps=1e-3, perc=None): eigvals, eigvecs = linalg.eigs(T.T, k=k, which='LR') # find k eigs with largest real part p = np.argsort(eigvals)[::-1] # sort in descending order of eigenvalues eigvals = eigvals.real[p] eigvecs = eigvecs.real[:, p] idx = (eigvals >= 1 - eps) # select eigenvectors with eigenvalue of 1 eigvals = eigvals[idx] eigvecs = np.absolute(eigvecs[:, idx]) if perc is not None: lbs, ubs = np.percentile(eigvecs, perc, axis=0) eigvecs[eigvecs < lbs] = 0 eigvecs = np.clip(eigvecs, 0, ubs) eigvecs /= eigvecs.max(0) return eigvals, eigvecs def terminal_states(adata, vkey='velocity', self_transitions=False, basis=None, weight_diffusion=0, scale_diffusion=1, eps=1e-3, copy=False): connectivities = get_connectivities(adata, 'distances') logg.info('computing root cells', r=True, end=' ') T = transition_matrix(adata, vkey=vkey, basis=basis, weight_diffusion=weight_diffusion, scale_diffusion=scale_diffusion, self_transitions=self_transitions, backward=True) eigvecs = eigs(T, eps=eps, perc=[2, 98])[1] eigvec = csr_matrix.dot(connectivities, eigvecs).sum(1) eigvec = np.clip(eigvec, 0, np.percentile(eigvec, 98)) adata.obs['root'] = scale(eigvec) logg.info('using ' + str(eigvecs.shape[1]) + ' eigenvectors with eigenvalue 1.') logg.info('computing end points', end=' ') T = transition_matrix(adata, vkey=vkey, basis=basis, weight_diffusion=weight_diffusion, scale_diffusion=scale_diffusion, self_transitions=self_transitions, backward=False) eigvecs = eigs(T, eps=eps, perc=[2, 98])[1] eigvec = csr_matrix.dot(connectivities, eigvecs).sum(1) eigvec = np.clip(eigvec, 0, np.percentile(eigvec, 98)) adata.obs['end'] = scale(eigvec) logg.info('using ' + str(eigvecs.shape[1]) + ' eigenvectors with eigenvalue 1.') logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.obs`\n' ' \'root\', root cells of Markov diffusion process\n' ' \'end\', end points of Markov diffusion process') return adata if copy else None PKf:Ms !scvelo/tools/transition_matrix.pyfrom scipy.spatial.distance import pdist, squareform from .utils import normalize_sparse import numpy as np def transition_matrix(adata, vkey='velocity', basis=None, backward=False, self_transitions=True, scale=10, use_negative_cosines=False, weight_diffusion=0, scale_diffusion=1): """Computes transition probabilities by applying Gaussian kernel to cosine similarities x scale Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. basis: `str` or `None` (default: `None`) Restrict transition to embedding if specified backward: `bool` (default: `False`) Whether to use the transition matrix to push forward (`False`) or to pull backward (`True`) scale: `float` (default: 10) Scale parameter of gaussian kernel. weight_diffusion: `float` (default: 0) Relative weight to be given to diffusion kernel (Brownian motion) scale_diffusion: `float` (default: 1) Scale of diffusion kernel. Returns ------- Returns sparse matrix with transition probabilities. """ if vkey+'_graph' not in adata.uns: raise ValueError('You need to run `tl.velocity_graph` first to compute cosine correlations.') graph = adata.uns[vkey + '_graph'].copy() if self_transitions: confidence = graph.max(1).A.flatten() ub = np.percentile(confidence, 98) self_prob = np.clip(ub - confidence, 0, 1) graph.setdiag(self_prob) T = np.expm1(graph * scale) # equivalent to np.exp(adata.uns[vkey + '_graph'].A * scale) - 1 T = T - np.expm1(-adata.uns[vkey + '_graph_neg'] * scale) if use_negative_cosines \ else T + (adata.uns[vkey + '_graph_neg'] < 0) * 1e-10 # weight direct neighbors with 1 and indirect (recurse) neighbors with 0.5 direct_neighbors = adata.uns['neighbors']['distances'] > 0 direct_neighbors.setdiag(1) T = .5 * T + .5 * direct_neighbors.multiply(T) if backward: T = T.T T = normalize_sparse(T) if 'X_' + str(basis) in adata.obsm.keys(): dists_emb = (T > 0).multiply(squareform(pdist(adata.obsm['X_' + basis]))) scale_diffusion *= dists_emb.data.mean() diffusion_kernel = dists_emb.copy() diffusion_kernel.data = np.exp(-.5 * dists_emb.data ** 2 / scale_diffusion ** 2) T = T.multiply(diffusion_kernel) # combine velocity based kernel with diffusion based kernel if 0 < weight_diffusion < 1: # add another diffusion kernel (Brownian motion - like) diffusion_kernel.data = np.exp(-.5 * dists_emb.data ** 2 / (scale_diffusion/2) ** 2) T = (1-weight_diffusion) * T + weight_diffusion * diffusion_kernel T = normalize_sparse(T) return TPKS`9Mؼf~~scvelo/tools/utils.pyfrom scipy.sparse import csr_matrix import numpy as np import warnings def prod_sum_obs(A, B): """dot product and sum over axis 0 (obs) equivalent to np.sum(A * B, 0) """ return np.einsum('ij, ij -> j', A, B) def prod_sum_var(A, B): """dot product and sum over axis 1 (var) equivalent to np.sum(A * B, 1) """ return np.einsum('ij, ij -> i', A, B) def norm(A): """computes the L2-norm along axis 1 (e.g. genes or embedding dimensions) equivalent to np.linalg.norm(A, axis=1) """ return np.sqrt(np.einsum('ij, ij -> i', A, A)) def cosine(dX, v, v_norm): dX -= dX.mean(-1)[:, None] return np.einsum('ij, j', dX, v) / (norm(dX) * v_norm)[None, :] def normalize_sparse(X): with warnings.catch_warnings(): warnings.simplefilter("ignore") X = X.multiply(csr_matrix(1. / np.abs(X).sum(1))) return X def normalize_dense(X): with warnings.catch_warnings(): warnings.simplefilter("ignore") X = X / X.sum(1) return X def scale(X, min=0, max=1): X = X - X.min() + min X = X / X.max() * max return X def get_indices(dist): n_neighbors = (dist > 0).sum(1).min() rows_idx = np.where((dist > 0).sum(1) > n_neighbors)[0] for row_idx in rows_idx: col_idx = dist[row_idx].indices[n_neighbors:] dist[row_idx, col_idx] = 0 dist.eliminate_zeros() indices = dist.indices.reshape((-1, n_neighbors)) return indices, dist def get_iterative_indices(indices, index, n_recurse_neighbors): return indices[get_iterative_indices(indices, index, n_recurse_neighbors-1)] \ if n_recurse_neighbors > 1 else indices[index] PK|8Mƽbbscvelo/tools/velocity.pyfrom ..logging import logg, settings from ..preprocessing.moments import moments, second_order_moments from .solver import solve_cov, solve2_inv, solve2_mle from .utils import prod_sum_obs from .velocity_confidence import velocity_confidence import numpy as np import warnings warnings.simplefilter(action='ignore', category=FutureWarning) def velocity(adata, vkey='velocity', mode='stochastic', fit_offset=False, fit_offset2=False, filter_genes=False, copy=False): """Estimates velocities in a gene-specific manner Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name under which to refer to the computed velocities for `velocity_graph` and `velocity_embedding`. mode: `'deterministic'`, `'stochastic'` or `'bayes'` (default: `'stochastic'`) Whether to run the estimation using the deterministic or stochastic model of transcriptional dynamics. `'bayes'` solves the stochastic model and accounts for heteroscedasticity, but is slower than `'stochastic'`. fit_offset: `bool` (default: `False`) Whether to fit with offset for first order moment dynamics. fit_offset2: `bool`, (default: `False`) Whether to fit with offset for second order moment dynamics. filter_genes: `bool` (default: `True`) Whether to remove genes that are not used for further velocity analysis. copy: `bool` (default: `False`) Return a copy instead of writing to `adata`. Returns ------- Returns or updates `adata` with the attributes velocity: `.layers` velocity vectors for each individual cell variance_velocity: `.layers` velocity vectors for the cell variances velocity_offset, velocity_beta, velocity_gamma, velocity_r2: `.var` parameters """ if 'Ms' not in adata.layers.keys(): moments(adata) Ms, Mu = adata.layers['Ms'], adata.layers['Mu'] logg.info('computing velocities', r=True) beta = np.ones(adata.n_vars, dtype="float32") # estimate all rates in units of the splicing rate offset, gamma = solve_cov(Ms, Mu, fit_offset) stochastic = any([mode in item for item in ['stochastic', 'bayes', 'alpha']]) if stochastic: Mss, Mus = second_order_moments(adata) offset2, gamma2 = solve_cov(2 * Mss - Ms, 2 * Mus + Mu, fit_offset2) # initialize covariance matrix res_std = (Mu - gamma[None, :] * Ms - offset[None, :]).std(0) res2_std = (2 * Mus + Mu - gamma2[None, :] * (2 * Mss - Ms) - offset2[None, :]).std(0) # solve multiple regression offset, offset2, gamma = solve2_mle(Ms, Mu, Mus, Mss, fit_offset, fit_offset2) if mode == 'bayes' \ else solve2_inv(Ms, Mu, 2 * Mss - Ms, 2 * Mus + Mu, res_std, res2_std, fit_offset, fit_offset2) adata.layers['variance_velocity'] = beta[None, :] * (2 * Mus - 2 * Ms * Mu + Mu) - \ gamma[None, :] * (2 * Mss - 2 * Ms ** 2 - Ms) - \ offset2[None, :] + 2 * offset[None, :] * Ms # if mode == 'alpha': # Muu = second_order_moments_u(adata) # offset2u, alpha = solve_cov(np.ones(Mu.shape) + 2 * Mu, 2 * Muu - Mu) # pars.extend([offset2u, alpha]) # pars_str.extend(['_offset2u', '_alpha']) res, tot = Mu - gamma[None, :] * Ms - offset[None, :], Mu - Mu.mean(0) with warnings.catch_warnings(): warnings.simplefilter("ignore") r2 = np.ones(adata.n_vars) - prod_sum_obs(res, res) / prod_sum_obs(tot, tot) gamma[np.isnan(gamma)], r2[np.isnan(r2)], res[np.isnan(res)] = 0, 0, 0 pars = [offset, offset2, beta, gamma, r2] if stochastic else [offset, beta, gamma, r2] pars_str = ['_offset', '_offset2', '_beta', '_gamma', '_r2'] if stochastic else ['_offset', '_beta', '_gamma', '_r2'] for i, par in enumerate(pars_str): adata.var[vkey + par] = pars[i] adata.layers[vkey] = Mu - gamma[None, :] * Ms - offset[None, :] adata.var['velocity_genes'] = (r2 > .01) & (gamma > .01) if filter_genes: adata._inplace_subset_var(adata.var['velocity_genes']) logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.layers`\n' ' \'' + vkey + '\', velocity vectors for each individual cell') velocity_confidence(adata, vkey) return adata if copy else None PK΃:MǠ #scvelo/tools/velocity_confidence.pyfrom ..logging import logg from .utils import prod_sum_var, norm, get_indices from ..preprocessing.moments import moments from .transition_matrix import transition_matrix from scanpy.api.pp import neighbors import numpy as np def velocity_confidence(adata, vkey='velocity', copy=False): if vkey not in adata.layers.keys(): raise ValueError( 'You need to run `tl.velocity` first.') idx = adata.var['velocity_genes'] X, V = adata.layers['Ms'][:, idx].copy(), adata.layers[vkey][:, idx].copy() indices = get_indices(dist=adata.uns['neighbors']['distances'])[0] V -= V.mean(1)[:, None] V_norm = norm(V) R = np.zeros(adata.n_obs) for i in range(adata.n_obs): Vi_neighs = V[indices[i]] Vi_neighs -= Vi_neighs.mean(1)[:, None] R[i] = np.mean(np.einsum('ij, j', Vi_neighs, V[i]) / (norm(Vi_neighs) * V_norm[i])[None, :]) adata.obs[vkey + '_length'] = V_norm adata.obs[vkey + '_confidence'] = R logg.hint( 'added to `.obs`\n' ' ' + vkey + '_confidence') return adata if copy else None def velocity_confidence_transition(adata, vkey='velocity', scale=10, copy=False): if vkey not in adata.layers.keys(): raise ValueError( 'You need to run `tl.velocity` first.') idx = adata.var['velocity_genes'] T = transition_matrix(adata, vkey=vkey, scale=scale) dX = T.dot(adata.layers['Ms'][:, idx]) - adata.layers['Ms'][:, idx] dX -= dX.mean(1)[:, None] V = adata.layers[vkey][:, idx].copy() V -= V.mean(1)[:, None] adata.obs[vkey + '_confidence_transition'] = prod_sum_var(dX, V) / (norm(dX) * norm(V)) logg.hint( 'added to `.obs`\n' ' ' + vkey + '_confidence_transition') return adata if copy else None def random_subsample(adata, frac=.5): subset = np.random.choice([True, False], size=adata.n_obs, p=[frac, 1-frac]).sum() adata.obs['subset'] = subset adata_subset = adata[subset].copy() neighbors(adata_subset) moments(adata_subset) return adata_subset def score_robustness(adata, adata_subset=None, vkey='velocity', copy=False): if adata_subset is None: adata_subset = random_subsample(adata) V = adata[adata.obs['subset']].layers[vkey] V_subset = adata_subset.layers[vkey] adata_subset.obs[vkey + '_score_robustness'] = prod_sum_var(V, V_subset) / (norm(V) * norm(V_subset)) return adata_subset if copy else None PK:Mݤ  "scvelo/tools/velocity_embedding.pyfrom ..logging import logg, settings from .utils import norm from .transition_matrix import transition_matrix from scipy.sparse import issparse import numpy as np def quiver_autoscale(X, Y, U, V): import matplotlib.pyplot as pl Q = pl.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=None) Q._init() pl.clf() return Q.scale def velocity_embedding(adata, basis='tsne', vkey='velocity', scale=10, self_transitions=True, use_negative_cosines=True, retain_scale=False, copy=False): """Computes the single cell velocities in the embedding Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'tsne'`) Which embedding to use. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. scale: `int` (default: 10) Scale parameter of gaussian kernel for transition matrix. retain_scale: `bool` (default: `False`) Whether to retain scale from high dimensional space in embedding. Returns ------- Returns or updates `adata` with the attributes velocity_umap: `.obsm` coordinates of velocity projection on embedding """ T = transition_matrix(adata, vkey=vkey, scale=scale, self_transitions=self_transitions, use_negative_cosines=use_negative_cosines) T.setdiag(0) T.eliminate_zeros() logg.info('computing velocity embedding', r=True) if 'X_' + basis not in adata.obsm_keys(): raise ValueError('You need compute the embedding first.') X_emb = adata.obsm['X_' + basis][:, :2] V_emb = np.zeros((adata.n_obs, 2)) TA = T.A if adata.n_obs < 8192 else None sparse = False if adata.n_obs < 8192 else True for i in range(adata.n_obs): indices = T[i].indices dX = X_emb[indices] - X_emb[i, None] # shape (n_neighbors, 2) if not retain_scale: dX /= norm(dX)[:, None] dX[np.isnan(dX)] = 0 # zero diff in a steady-state probs = T[i].data if sparse else TA[i, indices] V_emb[i] = probs.dot(dX) - probs.mean() * dX.sum(0) # probs.sum() / len(indices) if retain_scale: delta = T.dot(adata.X) - adata.X if issparse(delta): delta = delta.A cos_proj = (adata.layers[vkey] * delta).sum(1) / norm(delta) V_emb *= np.clip(cos_proj[:, None] * 10, 0, 1) V_emb /= (3 * quiver_autoscale(X_emb[:, 0], X_emb[:, 1], V_emb[:, 0], V_emb[:, 1])) vkey += '_' + basis adata.obsm[vkey] = V_emb logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.obsm`\n' ' \'' + vkey + '\', embedded velocity vectors') return adata if copy else None PKΆ9M<<scvelo/tools/velocity_graph.pyfrom ..logging import logg, settings from .utils import norm, cosine, get_indices, get_iterative_indices from .velocity import velocity from scanpy.api import Neighbors from scipy.sparse import coo_matrix import numpy as np import warnings class Cosines: def __init__(self, X, V, indices, n_recurse_neighbors, sqrt_transform=False): self.X = X.copy() self.indices = indices self.n_recurse_neighbors = n_recurse_neighbors self.sqrt_transform = sqrt_transform self.V = V.copy() if sqrt_transform: self.V = np.sqrt(np.abs(self.V)) * np.sign(self.V) self.V = self.V - self.V.mean(1)[:, None] self.V_norm = norm(self.V) def compute(self, ixs): vals, rows, cols, async_id = [], [], [], [] if self.sqrt_transform: for i in ixs: knn_ixs = np.unique(get_iterative_indices(self.indices, i, self.n_recurse_neighbors)) dX = self.X[knn_ixs] - self.X[i, None] dX = np.sqrt(np.abs(dX)) * np.sign(dX) vals.extend(cosine(dX, self.V[i], self.V_norm[i])) rows.extend(np.ones(len(knn_ixs)) * i) cols.extend(knn_ixs) else: for i in ixs: knn_ixs = np.unique(get_iterative_indices(self.indices, i, self.n_recurse_neighbors)) dX = self.X[knn_ixs] - self.X[i, None] vals.extend(cosine(dX, self.V[i], self.V_norm[i])) rows.extend(np.ones(len(knn_ixs)) * i) cols.extend(knn_ixs) async_id = int(ixs[0] / len(ixs)) # keep track of order vals = np.hstack(vals) vals[np.isnan(vals)] = 1e-10 # actually zero; just to store these entries in sparse matrix. return [async_id, vals, rows, cols] def velocity_graph(adata, vkey='velocity', n_recurse_neighbors=2, n_neighbors=None, sqrt_transform=False, copy=False): """Computes a velocity graph based on cosine similarities. The cosine similarities are computed between velocities and potential cell state transitions. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. n_neighbors: `int` or `None` (default: None) Use fixed number of neighbors or do recursive neighbor search (if `None`). n_recurse_neighbors: `int` (default: 2) Number of recursions to be done for neighbors search. sqrt_transform: `bool` (default: `False`) Whether to variance-transform the cell states and velocities before computing cosine similarities. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes velocity_graph: `.uns` sparse matrix with transition probabilities """ if vkey not in adata.layers.keys(): velocity(adata) logg.info('computing velocity graph', r=True) if n_neighbors is not None: keys = [key for key in ['X_pca', 'X_tsne', 'X_umap'] if key in adata.obsm.keys()] neighs = Neighbors(adata) neighs.compute_neighbors(n_neighbors=n_neighbors, use_rep=keys[-1], n_pcs=10) indices = get_indices(dist=neighs.distances)[0] n_recurse_neighbors = 1 else: indices = get_indices(dist=adata.uns['neighbors']['distances'])[0] cos = Cosines(adata.layers['Ms'][:, adata.var['velocity_genes']], adata.layers[vkey][:, adata.var['velocity_genes']], indices, n_recurse_neighbors, sqrt_transform) with warnings.catch_warnings(): warnings.simplefilter("ignore") if True: _, vals, rows, cols = cos.compute(range(adata.n_obs)) # list(map(cos.compute, range(adata.n_obs))) else: from multiprocessing import Pool pool = Pool(n_jobs) ranges = np.array_split(range(adata.n_obs), n_jobs) result = pool.map_async(cos.compute, ranges).get() # async will probably destroy order result.sort(key=lambda tup: tup[0]) # restore order vals, rows, cols = [], [], [] for r in result: vals.extend(r[1]) rows.extend(r[2]) cols.extend(r[3]) graph = coo_matrix((vals, (rows, cols)), shape=(adata.n_obs, adata.n_obs)) graph_neg = graph.copy() graph.data = np.clip(graph.data, 0, 1) graph_neg.data = np.clip(graph_neg.data, -1, 0) graph.eliminate_zeros() graph_neg.eliminate_zeros() adata.uns[vkey+'_graph'] = graph.tocsr() adata.uns[vkey+'_graph_neg'] = graph_neg.tocsr() logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.uns`\n' ' \'' + vkey + '_graph\', sparse matrix with cosine correlations') return adata if copy else None PKMI #scvelo/tools/velocity_pseudotime.pyimport numpy as np def principal_curve(adata, basis='pca', n_comps=4, clusters_list=None, copy=False): """Computes the principal curve Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'pca'`) Basis to use for computing the principal curve. n_comps: `int` (default: 4) Number of pricipal components to be used. copy: `bool`, (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes principal_curve: `.uns` dictionary containing `projections`, `ixsort` and `arclength` """ import rpy2.robjects as robjects from rpy2.robjects.packages import importr if clusters_list is not None: cell_subset = np.array([label in clusters_list for label in adata.obs['clusters']]) X_emb = adata[cell_subset].obsm['X_' + basis][:, :n_comps] else: cell_subset = None X_emb = adata.obsm['X_' + basis][:, :n_comps] n_obs, n_dim = X_emb.shape # convert array to R matrix xvec = robjects.FloatVector(X_emb.T.reshape((X_emb.size))) X_R = robjects.r.matrix(xvec, nrow=n_obs, ncol=n_dim) fit = importr("princurve").principal_curve(X_R) adata.uns['principal_curve'] = dict() adata.uns['principal_curve']['ixsort'] = ixsort = np.array(fit[1])-1 adata.uns['principal_curve']['projections'] = np.array(fit[0])[ixsort] adata.uns['principal_curve']['arclength'] = np.array(fit[2]) adata.uns['principal_curve']['cell_subset'] = cell_subset return adata if copy else None # def pseudotime(adata, iroot=229, clusters_list=None, copy=False): # # iroot = adata.obsm['X_umap'][adata.obs['clusters']=='neuroblast 1'][:,1].argmax() # root_cell = adata.obs_names[iroot] # if clusters_list is not None: # ['granule mature', 'neuroblast 1', 'neuroblast 2', 'granule immature'] # cell_subset = np.array([label in clusters_list for label in adata.obs['clusters']]) # adata_subset = adata.copy() # adata_subset._inplace_subset_obs(cell_subset) # adata_subset.uns['iroot'] = np.where(adata_subset.obs_names == root_cell)[0][0] # dpt(adata_subset, n_branchings=0) # # adata.obs['dpt_pseudotime'] = np.zeros(adata.n_obs) # adata.obs['dpt_pseudotime'][cell_subset] = adata_subset.obs['dpt_pseudotime'] # adata.obs['dpt_pseudotime'][~cell_subset] = -.5 # # return adata if copy else None PKwMm9}/scvelo-0.1.9.dist-info/LICENSEBSD 3-Clause License Copyright (c) 2018, Theis Lab All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PK!HSmPOscvelo-0.1.9.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,rzd&Y)r$[)T&UrPK!HBpw@scvelo-0.1.9.dist-info/METADATAWRFO?1%(q㦤4LHҟx-[$rd}>I1zA֞gUޓuڔckUXdIw'x?uQ(t2Λ$SD:].r |XD}#~6EZl}ƃA[R(5tBˋ 5Mݘ9oxHoU+o 8q\ر|o+%Jc g,}Q^d&7QQSdT)7֖\t@<ُ P{Sz*}k?\9*99_/NqUY# qvU+_;>@ϭI넝p0r8)lשF&EoȑIvO5[W\@~@/ PܓmkO_*Znj5Px .,[risc /T]r}|kxOLlUV(G/l~<Ƈh}S^n{`| nm׹F9VD~w_mU'76bbSw,)Īߓ/q{ޜ4=R[!vw[R;-fWK.fԁ*SoqnĦI]nFb!6g_@fI֗$\,f5023@TаY=y]*GWYJjݑsk :;v~ޑ]">B-IK<)d9am^*RT> U,B3%]& K+/S4#by̋6Em+%@x#K}7O] Fph#Dç@B3b VaR9F|H+mJsa`mg .M".X۹.1n6S^USr{ٯX, UW voh` ^,_!f ǜ?"^m Zmb:&٤>mݝ*A{TH{嬭.@zM,o%`;l~|y!vkuK4h1/PK!Hlr scvelo-0.1.9.dist-info/RECORDɲH}= T&^" 2ly["zɂ/N>`QJ<> Mϕy"bл`ك>P0#+Ep_=iXRlI;55Ku/vcbtD#8 6oT ]Ջ&M #Hr2"-C'}+}-+@I2oZQqZ/i]&~6֥󘼫Y;0[BvXa?eu0jG3nqD؟N'!)܊vUyԕuـ7)axU']txՠtɸ7o0ʜWu &ff? B2Ek>$'8 ;S{֣&Lj#z`A`0#g]ؾ}ikT|8p[2Y 6"|fwCV f'ЎH>E;l/򸨈UCWGp|[%>:)"z ]PMՁKqwoB|~7<.@-0 ȈX$-c~ğh u]_M.7[{#wbnBoJjo@}ލ~Q{`2iCXfO5]dVf[n?޾cUUP VOs:@L2JA >[/K921/~;r9$L6;΄d﷐*:1BЀ~/fn޴F o%8ɋ̈́S^blm+TA.=>Ȼc/vúUsLir.&Q Nsmˡ%'wk؞X$>G6@m9B BCSq!I^#qPd<6 6fc6rgO=ˇ!sQ+{<[^<( 3>VU9Z'@A Q'սJ;^;o056G$Rs=<^ʖޑRX%Gpccm7^gI]Gb VjrWz5l%Y}Slf.aBs ZADNwN,0nPK09ME77scvelo/__init__.pyPK:MMe33gscvelo/datasets.pyPKL+Mbxscvelo/get_version.pyPK",M,scvelo/logging.pyPK8b:MB scvelo/read_load.pyPKN&MYm"scvelo/plotting/__init__.pyPK,MS* * R#scvelo/plotting/docs.pyPK Mv77/scvelo/plotting/pseudotime.pyPK:M