PKR$M~ōscvelo/__init__.py"""scvelo - stochastic single cell RNA velocity""" from get_version import get_version __version__ = get_version(__file__) del get_version from . import preprocessing as pp from . import tools as tl from . import plotting as pl from . import datasets from . import logging PKϼMscvelo/datasets.py"""Builtin Datasets. """ from scanpy.api import read import numpy as np def dentategyrus(): """Dentate Gyrus dataset from Hochgerner et al. (2018). Dentate gyrus is part of the hippocampus involved in learning, episodic memory formation and spatial coding. It is measured using 10X Genomics Chromium and described in Hochgerner et al. (2018). The data consists of 25,919 genes across 3,396 cells and provides several interesting characteristics. """ filename = 'data/DentateGyrus/10X43_1.loom' url = 'http://pklab.med.harvard.edu/velocyto/DG1/10X43_1.loom' adata = read(filename, backup_url=url, cleanup=True, sparse=True, cache=True) return adata def toy_data(n_obs): """Random samples from Dentate Gyrus. """ adata = dentategyrus() indices = np.random.choice(adata.n_obs, n_obs) adata = adata[indices] adata.obs_names = np.array(range(adata.n_obs), dtype='str') return adata PK-SMkscvelo/logging.pyfrom scanpy import settings from scanpy import logging as logg from datetime import datetime from time import time def get_passed_time(): now = time() elapsed = now - settings._previous_time settings._previous_time = now return elapsed def get_date_string(): return datetime.now().strftime("%Y-%m-%d %H:%M") def print_version_and_date(): from . import __version__ logg._write_log('Running scvelo', __version__, 'on {}.'.format(get_date_string())) PKMdӂscvelo/plotting/__init__.pyfrom .utils import scatter, phase from .velocity import velocity from .velocity_embedding import velocity_embedding from .velocity_embedding_grid import velocity_embedding_grid PK Mv77scvelo/plotting/pseudotime.pyfrom .utils import * def principal_curve(adata): X_curve = adata.uns['principal_curve']['projections'] ixsort = adata.uns['principal_curve']['ixsort'] pl.plot(X_curve[ixsort, 0], X_curve[ixsort, 1], c="k", lw=3, zorder=2000000) def pseudotime(adata, gene_list, ckey='velocity', reverse=False): ixsort = adata.uns['principal_curve']['ixsort'] arclength = adata.uns['principal_curve']['arclength'] if reverse: arclength /= np.max(arclength) else: arclength = (np.max(arclength) - arclength) / np.max(arclength) cell_subset = adata.uns['principal_curve']['cell_subset'] adata_subset = adata[cell_subset].copy() gs = pl.GridSpec(1, len(gene_list)) for n, gene in enumerate(gene_list): i = np.where(adata_subset.var_names == gene)[0][0] ax = pl.subplot(gs[n]) lb, ub = np.percentile(adata_subset.obsm[ckey][:, i], [.5, 99.5]) c = np.clip(adata_subset.obsm[ckey][ixsort, i], lb, ub) # pl.scatter(arclength[ixsort], adata2.obsm['Mu'][ixsort, i], alpha=0.7, c='b', s=5, label="unspliced") pl.scatter(arclength[ixsort], adata_subset.obsm['Ms'][ixsort, i] * adata_subset.uns['velocity_pars']['gamma'][i], c=c, cmap='coolwarm', alpha=1, s=1, label="spliced") c = c / abs(c).max() * (adata_subset.obsm['Ms'][ixsort, i] * adata_subset.uns['velocity_pars']['gamma'][i]).max() z = np.ma.polyfit(arclength[ixsort], c, 8) fun = np.poly1d(z) pl.plot(arclength[ixsort], fun(arclength[ixsort]), label=ckey) # Hide the right and top spines ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) pl.ylabel(gene) pl.title('Colored by ' + ckey) PK$Mwscvelo/plotting/utils.pyimport numpy as np import matplotlib.pyplot as pl from matplotlib.colors import rgb2hex from matplotlib.ticker import MaxNLocator from mpl_toolkits.axes_grid1.inset_locator import inset_axes def get_colors(adata, basis): cluster_ix = adata.obs[basis].cat.codes if basis+'_colors' in adata.uns.keys(): clusters_colors = adata.uns[basis + '_colors'] else: colormap = np.vstack((pl.cm.tab20b(np.linspace(0., 1, 20))[::2], pl.cm.tab20c(np.linspace(0, 1, 20))[1::2], pl.cm.tab20b(np.linspace(0., 1, 20))[1::2], pl.cm.tab20c(np.linspace(0, 1, 20))[0::2])) clusters_colors = [rgb2hex(c) for c in colormap[:len(adata.obs[basis])]] return np.array([clusters_colors[cluster_ix[i]] for i in range(adata.X.shape[0])]) def interpret_colorkey(adata, c=None, layer=None, perc=None): if perc is None: perc = [2, 98] if isinstance(c, str): if c in adata.obs.keys(): # color by observation key if adata.obs[c].dtype.name == 'category': c = get_colors(adata, basis=c) else: c = adata.obs[c] lb, ub = np.percentile(c, perc) c = np.clip(c, lb, ub) elif c in adata.var_names: # color by var in specific layer c = adata[:, c].layers[layer] if layer in adata.layers.keys() else adata[:, c].X lb, ub = np.percentile(c, perc) c = np.clip(c, lb, ub) elif c is None: # color by cluster or louvain or grey if no color is specified c = get_colors(adata, 'clusters') if 'clusters' in adata.obs.keys() \ else get_colors(adata, 'louvain') if 'louvain' in adata.obs.keys() else 'grey' elif isinstance(c, np.ndarray): # continuous coloring lb, ub = np.percentile(c, perc) c = np.clip(c, lb, ub) else: raise ValueError('color key is invalid! pass valid observation annotation or a gene name') return c def scatter(adata, x=None, y=None, basis='umap', layer=None, color=None, xlabel=None, ylabel=None, color_map=None, size=None, alpha=1, fontsize=None, frameon=False, title=None, show=True, colorbar=False, save=None, ax=None, **kwargs): """Scatter plot along observations or variables axes. Color the plot using annotations of observations (`.obs`), variables (`.var`) or expression of genes (`.var_names`). Arguments --------- adata: `AnnData` Annotated data matrix. basis: `str` (default='tsne') plots embedding obsm['X_' + basis] x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate color : `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes Returns ------- If `show==False` a `matplotlib.Axis` """ if (x is None) | (y is None): X_emb = adata.obsm['X_' + basis][:, :2] x, y = X_emb[:, 0], X_emb[:, 1] pl.scatter(x, y, c=interpret_colorkey(adata, color, layer), cmap=color_map, s=size, alpha=alpha, **kwargs) if isinstance(title, str): pl.title(title, fontsize=fontsize) if isinstance(xlabel, str) or isinstance(ylabel, str): pl.xlabel(xlabel, fontsize=fontsize) pl.ylabel(ylabel, fontsize=fontsize) if not frameon: pl.axis('off') if isinstance(save, str): pl.savefig(save) if ax is not None: ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) labelsize = int(fontsize * .75) if fontsize is not None else None ax.tick_params(axis='both', which='major', labelsize=labelsize) if colorbar and (ax is not None): cb = pl.colorbar(orientation='vertical', cax= inset_axes(ax, width="2%", height="30%", loc=4, borderpad=0)) cb.locator = (MaxNLocator(nbins=3)) cb.update_ticks() if show: pl.show() else: return ax def phase(adata, var=None, x=None, y=None, color='louvain', fits='all', xlabel='spliced', ylabel='unspliced', fontsize=None, show=True, ax=None, **kwargs): if isinstance(var, str) and (var in adata.var_names): if (x is None) or (y is None): ix = np.where(adata.var_names == var)[0][0] x, y = adata.layers['Ms'][:, ix], adata.layers['Mu'][:, ix] else: ValueError('var not found in adata.var_names.') ax = scatter(adata, x=x, y=y, color=color, frameon=True, title=var, xlabel=xlabel, ylabel=ylabel, ax=ax, **kwargs) xnew = np.linspace(0, x.max() * 1.02) fits = adata.layers.keys() if fits == 'all' else fits fits = [fit for fit in fits if 'velocity' in fit] for fit in fits: linestyle = '--' if 'stochastic' in fit else '-' pl.plot(xnew, adata.var[fit+'_gamma'][ix] / adata.var[fit+'_beta'][ix] * xnew + adata.var[fit+'_offset'][ix] / adata.var[fit+'_beta'][ix], c='k', linestyle=linestyle) if show: pl.show() else: return axPKMM^ppscvelo/plotting/velocity.pyfrom .utils import * from ..preprocessing.moments import second_order_moments def velocity(adata, var_names=None, basis='umap', mode='deterministic', fits='all', layers='all', color=None, fontsize=8, color_map='RdBu_r', size=.2, alpha=.5, ax=None, **kwargs): """Phase and velocity plot for set of genes. The phase plot shows pliced against unspliced expressions with steady-state fit. Further the embedding is shown colored by velocity and expression. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. var_names: `str` or list of `str` (default: `None`) Which variables to show. basis: `str` (default: `'umap'`) Key for embedding coordinates. """ var_names = [var_names] if isinstance(var_names, str) else var_names var_names = [var for var in var_names if var in adata.var_names[adata.var.velocity_genes]] \ if isinstance(var_names, list) else adata.var_names[(adata.layers['spliced'] > 0).sum(0).argsort()[::-1][:4]] layers = ['velocity', 'Ms', 'variance_velocity'] if layers == 'all' else layers layers = [layer for layer in layers if layer in adata.layers.keys()] fits = adata.layers.keys() if fits == 'all' else fits fits = [fit for fit in fits if all(['velocity' in fit, fit + '_gamma' in adata.var.keys()])] n_row, n_col = len(var_names), (1 + len(layers) + (mode == 'stochastic')*2) ax = pl.figure(figsize=(3*n_col, 2*n_row), dpi=120) if ax is None else ax gs = pl.GridSpec(n_row, n_col, wspace=0.3, hspace=0.5) for v, var in enumerate(var_names): ix = np.where(adata.var_names == var)[0][0] s, u = adata.layers['Ms'][:, ix], adata.layers['Mu'][:, ix] # spliced/unspliced phase portrait with steady-state estimate ax = pl.subplot(gs[v * n_col]) scatter(adata, x=s, y=u, color=color, frameon=True, title=var, xlabel='spliced', ylabel='unspliced', show=False, ax=ax, fontsize=fontsize, size=size, alpha=alpha, **kwargs) xnew = np.linspace(0, s.max() * 1.02) for fit in fits: linestyle = '--' if 'stochastic' in fit else '-' pl.plot(xnew, adata.var[fit + '_gamma'][ix] / adata.var[fit + '_beta'][ix] * xnew + adata.var[fit + '_offset'][ix] / adata.var[fit + '_beta'][ix], c='k', linestyle=linestyle) if v == len(var_names)-1: pl.legend(fits, loc='lower right', prop={'size': .5*fontsize}) ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) ax.tick_params(axis='both', which='major', labelsize=.7*fontsize) # velocity and expression plots for l, layer in enumerate(layers): ax = pl.subplot(gs[v*n_col + l + 1]) title = 'expression' if layer == 'Ms' else layer scatter(adata, basis=basis, color=var, layer=layer, color_map=color_map, title=title, fontsize=fontsize, size=size, alpha=alpha, show=False, ax=ax, **kwargs) if mode == 'stochastic' is not None: ss, us = second_order_moments(adata) ax = pl.subplot(gs[v*n_col + len(layers) + 1]) x = 2 * (ss - s**2) - s y = 2 * (us - u * s) + u + 2 * s * \ adata.var['stochastic_velocity_offset'][ix] / adata.var['stochastic_velocity_beta'][ix] scatter(adata, x=x, y=y, color=color, title=var, fontsize=40/n_col, show=False, ax=ax, xlabel=r'2 $\Sigma_s - \langle s \rangle$', ylabel=r'2 $\Sigma_{us} + \langle u \rangle$', **kwargs) xnew = np.linspace(x.min(), x.max() * 1.02) fits = adata.layers.keys() if fits == 'all' else fits fits = [fit for fit in fits if 'velocity' in fit] for fit in fits: linestyle = '--' if 'stochastic' in fit else '-' pl.plot(xnew, adata.var[fit + '_gamma'][ix] / adata.var[fit + '_beta'][ix] * xnew + adata.var[fit + '_offset2'][ix] / adata.var[fit + '_beta'][ix], c='k', linestyle=linestyle) if v == len(var_names) - 1: pl.legend(fits, loc='lower right', prop={'size': 34/n_col})PKM`| %scvelo/plotting/velocity_embedding.pyfrom .utils import * from scanpy.api.pl import scatter from matplotlib.colors import is_color_like def velocity_embedding(adata, basis='umap', vbasis='velocity', layer=None, density=1, color=None, use_raw=True, sort_order=True, alpha=.2, groups=None, components=None, projection='2d', legend_loc='right margin', legend_fontsize=None, legend_fontweight=None, color_map=None, palette=None, frameon=False, right_margin=None, left_margin=None, size=None, title=None, show=True, save=None, ax=None, **kwargs): """Scatter plot with velocities along `.obs` or `.var` axes. Color the plot using annotations of observations (`.obs`), variables (`.var`) or expression of genes (`.var_names`). Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'umap'`) Key for embedding coordinates. vbasis: `str` (default: `'velocity'`) Key for velocity embedding coordinates. color : `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. Returns ------- `matplotlib.Axis` if `show==False` """ if ax is None: ax = pl.figure(None, (14, 10), dpi=160).gca() scatter(adata, color=color, use_raw=use_raw, sort_order=sort_order, alpha=alpha, basis=basis, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, save=save, ax=ax) vbasis += '_' + basis if vbasis not in adata.obsm_keys(): raise ValueError( 'You need to run `tl.velocity_embedding` first to compute embedded velocity vectors.') X_emb = adata.obsm['X_' + basis][:, :2] V_emb = adata.obsm[vbasis] _kwargs = {"scale": 1, "width": .0005, "edgecolors": 'k', "headwidth": 9, "headlength": 10, "headaxislength": 6, "linewidth": .25} _kwargs.update(kwargs) ix_choice = np.random.choice(adata.n_obs, size=int(density * adata.n_obs), replace=False) X, V, C = X_emb[ix_choice], V_emb[ix_choice], interpret_colorkey(adata, color, layer)[ix_choice] if is_color_like(C[0]): pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], color=C, angles='xy', scale_units='xy', cmap=color_map, **_kwargs) else: pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], C, angles='xy', scale_units='xy', cmap=color_map, **_kwargs) if show: pl.show() else: return axPKHMa *scvelo/plotting/velocity_embedding_grid.pyfrom scanpy.api.pl import scatter from sklearn.neighbors import NearestNeighbors from scipy.stats import norm as normal import numpy as np import matplotlib.pyplot as pl def compute_velocity_on_grid(X_emb, V_emb, density=1, smooth=0.5, n_neighbors=None, min_mass=.5): """Computes the velocities for the grid points on the embedding Arguments --------- X_emb: np.ndarray embedding coordinates V_emb: np.ndarray embedded single cell velocity (obtained with 'velocity_embedding') density: float, default=1 smooth: float, default=.5 n_neighbors: int min_mass: float, default=.5 Returns ------- grid_coord: np.array grid point coordinates grid_velocity: np.array velocity for each grid point in the embedding """ # prepare grid n_obs, n_dim = X_emb.shape steps = (int(np.sqrt(n_obs) * density), int(np.sqrt(n_obs) * density)) grs = [] for dim_i in range(n_dim): m, M = np.min(X_emb[:, dim_i]), np.max(X_emb[:, dim_i]) m = m - .01 * np.abs(M - m) M = M + .01 * np.abs(M - m) gr = np.linspace(m, M, steps[dim_i]) grs.append(gr) meshes_tuple = np.meshgrid(*grs) grid_coord = np.vstack([i.flat for i in meshes_tuple]).T # estimate grid velocities if n_neighbors is None: n_neighbors = int(n_obs/50) nn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=-1) nn.fit(X_emb) dists, neighs = nn.kneighbors(grid_coord) std = np.mean([(g[1] - g[0]) for g in grs]) weight = normal.pdf(loc=0, scale=smooth * std, x=dists) p_mass = weight.sum(1) grid_velocity = (V_emb[neighs] * weight[:, :, None]).sum(1) / np.maximum(1, p_mass)[:, None] grid_coord, grid_velocity = grid_coord[p_mass > min_mass], grid_velocity[p_mass > min_mass] return grid_coord, grid_velocity def velocity_embedding_grid(adata, basis='umap', vbasis='velocity', density=1, color=None, min_mass=.5, smooth=.5, n_neighbors=None, principal_curve=False, use_raw=True, sort_order=True, alpha=.2, groups=None, components=None, projection='2d', legend_loc='right margin', legend_fontsize=None, legend_fontweight=None, color_map=None, palette=None, frameon=False, right_margin=None, left_margin=None, size=None, title=None, show=True, save=None, ax=None, **kwargs): """Scatter plot with grid velocities along `.obs` or `.var` axes. Color the plot using annotations of observations (`.obs`), variables (`.var`) or expression of genes (`.var_names`). Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'umap'`) Key for embedding coordinates. vbasis: `str` (default: `'velocity'`) Key for velocity embedding coordinates. color : `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. Returns ------- `matplotlib.Axis` if `show==False` """ if ax is None: ax = pl.figure(None, (14, 10), dpi=120).gca() scatter(adata, color=color, use_raw=use_raw, sort_order=sort_order, alpha=alpha, basis=basis, groups=groups, components=components, projection=projection, legend_loc=legend_loc, legend_fontsize=legend_fontsize, legend_fontweight=legend_fontweight, color_map=color_map, palette=palette, frameon=frameon, right_margin=right_margin, left_margin=left_margin, size=size, title=title, show=False, save=save, ax=ax) vbasis += '_' + basis if vbasis not in adata.obsm_keys(): raise ValueError( 'You need to run `tl.velocity_embedding` first to compute embedded velocity vectors.') X_emb = adata.obsm['X_' + basis][:, :2] V_emb = adata.obsm[vbasis] _kwargs = {"scale": .5, "width": .001, "color": 'black', "edgecolors": 'k', "headwidth": 4.5, "headlength": 5, "headaxislength": 3, "linewidth": .2} _kwargs.update(kwargs) X, V = compute_velocity_on_grid(X_emb, V_emb, density, smooth, n_neighbors, min_mass) pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], angles='xy', scale_units='xy', **_kwargs) if principal_curve: curve = adata.uns['principal_curve']['projections'] pl.plot(curve[:, 0], curve[:, 1], c="w", lw=6, zorder=1000000) pl.plot(curve[:, 0], curve[:, 1], c="k", lw=3, zorder=2000000) if show: pl.show() else: return axPK.`Mvqoo scvelo/preprocessing/__init__.pyfrom .utils import show_proportions, cleanup from .moments import moments from .recipes import recipe_velocity PKd$M nnscvelo/preprocessing/moments.pyfrom scipy.sparse import csr_matrix import numpy as np from ..logging import * def moments(adata, renormalize=False, mode='connectivities', copy=False): """Computes first order moments for velocity estimation. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. renormalize: `bool` (default: `True`) Renormalize the moments by total counts per cell to its median. mode: `'connectivities'` or `'distances'` (default: `'connectivities'`) Distance metric to use for moment computation. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes Ms: `.layers` dense matrix with first order moments of spliced counts. Mu: `.layers` dense matrix with first order moments of unspliced counts. """ if 'neighbors' not in adata.uns: raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.') logg.info('computing moments', r=True) if mode in adata.uns['neighbors']: connectivities = adata.uns['neighbors'][mode] else: raise ValueError('mode can only be \'connectivities\' or \'distances\'') connectivities.setdiag(1) connectivities = connectivities.multiply(1. / connectivities.sum(1)) connectivities += connectivities.dot(connectivities*.5) adata.layers['Ms'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['spliced'])).A adata.layers['Mu'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['unspliced'])).A if renormalize: adata.layers['Ms'] = adata.layers['Ms'] / adata.layers['Ms'].sum(1)[:, None] * np.median(adata.layers['Ms'].sum(1)) adata.layers['Mu'] = adata.layers['Mu'] / adata.layers['Mu'].sum(1)[:, None] * np.median(adata.layers['Mu'].sum(1)) logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.layers`\n' ' \'Ms\', moments of spliced abundances\n' ' \'Mu\', moments of unspliced abundances') return adata if copy else None def second_order_moments(adata): """Computes second order moments for stochastic velocity estimation. Arguments --------- adata: `AnnData` Annotated data matrix. Returns ------- Mss: Second order moments for spliced abundances Mus: Second order moments for spliced with unspliced abundances """ if 'neighbors' not in adata.uns: raise ValueError( 'You need to run `pp.neighbors` first to compute a neighborhood graph.') connectivities = adata.uns['neighbors']['connectivities'] > 0 connectivities = connectivities.multiply(1. / connectivities.sum(1)) s, u = csr_matrix(adata.layers['spliced']), csr_matrix(adata.layers['unspliced']) Mss = csr_matrix.dot(connectivities, s.multiply(s)).A Mus = csr_matrix.dot(connectivities, s.multiply(u)).A return Mss, Mus def second_order_moments_u(adata): """Computes second order moments for stochastic velocity estimation. Arguments --------- adata: `AnnData` Annotated data matrix. Returns ------- Muu: Second order moments for unspliced abundances """ if 'neighbors' not in adata.uns: raise ValueError( 'You need to run `pp.neighbors` first to compute a neighborhood graph.') connectivities = adata.uns['neighbors']['connectivities'] > 0 connectivities = connectivities.multiply(1. / connectivities.sum(1)) u = csr_matrix(adata.layers['unspliced']) Muu = csr_matrix.dot(connectivities, u.multiply(u)).A return MuuPK$ $M  scvelo/preprocessing/recipes.pyfrom .moments import moments def recipe_velocity(adata, min_counts=10, n_top_genes=None, n_pcs=30, n_neighbors=30, log=True, copy=False): """Filtering, normalization, neighbors graph and moments. Expects non-logarithmized data. If using logarithmized data, pass `log=False`. The recipe runs the following steps .. code:: python sc.pp.filter_genes(adata, min_counts=10) sc.pp.normalize_per_cell(adata, key_n_counts='n_counts_all') sc.pp.filter_genes_dispersion(adata, n_top_genes=3000) sc.pp.normalize_per_cell(adata, layers='all) if log: sc.pp.log1p(adata) sc.pp.pca(adata, n_comps=30) sc.pp.neighbors(adata, n_neighbors=30, use_rep='X_pca') scv.pp.moments(adata) Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. n_top_genes: `int` (default: 1000) Number of genes to keep. log: `bool` (default: `True`) Take logarithm. n_pcs: `int` (default: 30) Number of principal components to use. n_neighbors: `int` (default: 30) Number of neighbors to use. copy: `bool` (default: `False`) Return a copy of `adata` instead of updating it. Returns ------- Returns or updates `adata` depending on `copy`. """ from scanpy.api.pp import filter_genes, filter_genes_dispersion, normalize_per_cell, log1p, pca, neighbors filter_genes(adata, min_counts=min_counts) normalize_per_cell(adata, key_n_counts='n_counts_all') filter_genes_dispersion(adata, n_top_genes=n_top_genes) normalize_per_cell(adata, layers='all') if log: log1p(adata) pca(adata, n_comps=n_pcs, svd_solver='arpack') neighbors(adata, n_neighbors=n_neighbors, use_rep='X_pca') moments(adata) return adata if copy else None PKaM6< < scvelo/preprocessing/utils.pyimport numpy as np def show_proportions(adata, copy=False): """Fraction of spliced/unspliced/ambiguous abundances Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Prints the fractions of abundances. """ layers_keys = [key for key in ['spliced', 'unspliced', 'ambiguous'] if key in adata.layers.keys()] tot_mol_cell_layers = [adata.layers[key].sum(1) for key in layers_keys] mean_abundances = np.round( [np.mean(tot_mol_cell / np.sum(tot_mol_cell_layers, 0)) for tot_mol_cell in tot_mol_cell_layers], 2) print('abundance of ' + str(layers_keys) + ': ' + str(mean_abundances)) return adata if copy else None def cleanup(adata, clean='layers', keep={'spliced', 'unspliced'}, copy=False): """Deletes attributes not needed. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. cleanup: `str` or list of `str` (default: `layers`) Which attributes to consider for freeing memory. keep: `str` or list of `str` (default: `['spliced', unspliced']`) Which attributes to keep. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with selection of attributes kept. """ if any(['obs' in clean, 'all' in clean]): for key in list(adata.obs.keys()): if key not in keep: del adata.obs[key] if any(['var' in clean, 'all' in clean]): for key in list(adata.var.keys()): if key not in keep: del adata.var[key] if any(['obsm' in clean, 'all' in clean]): for key in list(adata.obsm.keys()): if key not in keep: del adata.obsm[key] if any(['varm' in clean, 'all' in clean]): for key in list(adata.varm.keys()): if key not in keep: del adata.varm[key] if any(['uns' in clean, 'all' in clean]): for key in list(adata.uns.keys()): if key not in keep: del adata.uns[key] if any(['layers' in clean, 'all' in clean]): for key in list(adata.layers.keys()): # remove layers that are not needed if key not in keep: del adata.layers[key] return adata if copy else None PK$M=5scvelo/tools/__init__.pyfrom .velocity import velocity from .velocity_graph import velocity_graph from .transition_matrix import transition_matrix from .velocity_embedding import velocity_embedding from .velocity_score import score_smoothness, score_transition, score_robustness PKa\MRUHPPscvelo/tools/solver.pyfrom .utils import prod_sum_obs from scipy.optimize import minimize import numpy as np import warnings def solve_cov(x, y, fit_offset=False): """Solution to least squares: gamma = cov(X,Y) / var(X) """ n_obs, n_var = x.shape if fit_offset: cov_xy, cov_xx = prod_sum_obs(x, y) / n_obs, prod_sum_obs(x, x) / n_obs mean_x, mean_y = x.mean(0), y.mean(0) numerator_offset = cov_xx * mean_y - cov_xy * mean_x numerator_gamma = cov_xy - mean_x * mean_y offset, gamma = (numerator_offset, numerator_gamma) / (cov_xx - mean_x * mean_x) else: offset, gamma = np.zeros(n_var), prod_sum_obs(x, y) / prod_sum_obs(x, x) return offset, gamma def solve2_inv(x, y, x2, y2, res_std=None, res2_std=None, fit_offset=False, fit_offset2=False): """Solution to the 2-dim generalized least squares: gamma = inv(X'QX)X'QY """ n_obs, n_var = x.shape offset, offset_ss = np.zeros(n_var, dtype="float32"), np.zeros(n_var, dtype="float32") gamma = np.ones(n_var, dtype="float32") if (res_std is None) or (res2_std is None): res_std, res2_std = np.ones(n_var), np.ones(n_var) ones, zeros = np.ones(n_obs), np.zeros(n_obs) with warnings.catch_warnings(): warnings.simplefilter("ignore") x, y = np.vstack((x/res_std, x2/res2_std)), np.vstack((y/res_std, y2/res2_std)) if fit_offset and fit_offset2: for i in range(n_var): A = np.c_[np.vstack((np.c_[ones/res_std[i], zeros], np.c_[zeros, ones/res2_std[i]])), x[:, i]] offset[i], offset_ss[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) elif fit_offset: for i in range(n_var): A = np.c_[np.hstack((ones/res_std[i], zeros)), x[:, i]] offset[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) elif fit_offset2: for i in range(n_var): A = np.c_[np.hstack((zeros, ones/res2_std[i])), x[:, i]] offset_ss[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) else: for i in range(n_var): A = np.c_[x[:, i]] gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) return offset, offset_ss, gamma def solve2_mle(Ms, Mu, Mus, Mss, fit_offset=False, fit_offset2=False): """Maximizing the log likelihood using weights according to empirical bayes """ n_obs, n_var = Ms.shape offset, offset_ss = np.zeros(n_var, dtype="float32"), np.zeros(n_var, dtype="float32") gamma = np.ones(n_var, dtype="float32") def sse(A, data, b): sigma = (A.dot(data) - b).std(1) return np.log(sigma).sum() # np.log(np.sqrt(2*np.pi) * sigma).sum() + (.5 * (res/sigma[:, None])**2).sum() if fit_offset and fit_offset2: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset[i], offset_ss[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[2], 0, 0], [1, m[2], 2, -2 * m[2]]]), data, b=np.array(m[0], m[1])), x0=(1e-4, 1e-4, 1), method="L-BFGS-B").x elif fit_offset: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[1], 0, 0], [1, m[1], 2, -2 * m[1]]]), data, b=np.array(m[0], 0)), x0=(1e-4, 1), method="L-BFGS-B").x elif fit_offset2: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset_ss[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[1], 0, 0], [1, m[1], 2, -2 * m[1]]]), data, b=np.array(0, m[0])), x0=(1e-4, 1), method="L-BFGS-B").x else: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m, 0, 0], [1, m, 2, -2 * m]]), data, b=0), x0=gamma[i], method="L-BFGS-B").x return offset, offset_ss, gamma PKe$MEz>!scvelo/tools/transition_matrix.pyfrom scipy.spatial.distance import pdist, squareform from scipy.sparse import csr_matrix import numpy as np import warnings def transition_matrix(adata, vkey='velocity', basis=None, direction="forward", scale=10): """Computes transition probabilities by applying Gaussian kernel to cosine similarities x scale Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. basis: `str` or `None` (default: `None`) Restrict transition to embedding if specified direction: `'forward'` or `'backward'` (default: `'foward'`) Whether to use the transition matrix to push forward or to pull backward scale: `float` (default: 10) Scale parameter of gaussian kernel. Returns ------- Returns sparse matrix with transition probabilities. """ if vkey+'_graph' not in adata.uns: raise ValueError( 'You need to run `tl.velocity_graph` first to compute cosine correlations.') T = np.expm1(adata.uns[vkey + '_graph'] * scale) dists = adata.uns['neighbors']['distances'] T = .5 * T + .5 * (dists > 0).multiply(T) with warnings.catch_warnings(): warnings.simplefilter("ignore") T = T.multiply(csr_matrix(1. / T.sum(1))) if basis in adata.obsm.keys(): if direction == 'backward': T = T.T X_emb = adata.obsm['X_' + basis] dists_emb = (T > 0).multiply(squareform(pdist(X_emb))) kernel = np.expm1(-dists_emb**2 / dists_emb.data.mean()**2 / 2) T = T.multiply(kernel) T = T.multiply(csr_matrix(1. / T.sum(1))) return T PKӸM\scvelo/tools/utils.pyimport numpy as np def prod_sum_obs(A, B): """dot product and sum over axis 0 (obs) equivalent to np.sum(A * B, 0) """ return np.einsum('ij, ij -> j', A, B) def prod_sum_var(A, B): """dot product and sum over axis 1 (var) equivalent to np.sum(A * B, 1) """ return np.einsum('ij, ij -> i', A, B) def norm(A): """computes the L2-norm along axis 1 (e.g. genes or embedding dimensions) equivalent to np.linalg.norm(A, axis=1) """ return np.sqrt(np.einsum('ij, ij -> i', A, A)) PK\M)scvelo/tools/velocity.pyfrom ..logging import logg, settings from ..preprocessing.moments import moments, second_order_moments from .solver import solve_cov, solve2_inv, solve2_mle from .utils import prod_sum_obs import numpy as np import warnings warnings.simplefilter(action='ignore', category=FutureWarning) def velocity(adata, vkey='velocity', mode='stochastic', fit_offset=False, fit_offset2=False, filter_genes=False, copy=False): """Estimates velocities in a gene-specific manner Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name under which to refer to the computed velocities for `velocity_graph` and `velocity_embedding`. mode: `'deterministic'`, `'stochastic'` or `'bayes'` (default: `'stochastic'`) Whether to run the estimation using the deterministic or stochastic model of transcriptional dynamics. `'bayes'` solves the stochastic model and accounts for heteroscedasticity, but is slower than `'stochastic'`. fit_offset: `bool` (default: `False`) Whether to fit with offset for first order moment dynamics. fit_offset2: `bool`, (default: `False`) Whether to fit with offset for second order moment dynamics. filter_genes: `bool` (default: `True`) Whether to remove genes that are not used for further velocity analysis. copy: `bool` (default: `False`) Return a copy instead of writing to `adata`. Returns ------- Returns or updates `adata` with the attributes velocity: `.layers` velocity vectors for each individual cell variance_velocity: `.layers` velocity vectors for the cell variances velocity_offset, velocity_beta, velocity_gamma, velocity_r2: `.var` parameters """ if 'Ms' not in adata.layers.keys(): moments(adata) Ms, Mu = adata.layers['Ms'], adata.layers['Mu'] logg.info('computing velocities', r=True) beta = np.ones(adata.n_vars, dtype="float32") # estimate all rates in units of the splicing rate offset, gamma = solve_cov(Ms, Mu, fit_offset) stochastic = any([mode in item for item in ['stochastic', 'bayes', 'alpha']]) if stochastic: Mss, Mus = second_order_moments(adata) offset2, gamma2 = solve_cov(2 * Mss - Ms, 2 * Mus + Mu, fit_offset2) # initialize covariance matrix res_std = (Mu - gamma[None, :] * Ms - offset[None, :]).std(0) res2_std = (2 * Mus + Mu - gamma2[None, :] * (2 * Mss - Ms) - offset2[None, :]).std(0) # solve multiple regression offset, offset2, gamma = solve2_mle(Ms, Mu, Mus, Mss, fit_offset, fit_offset2) if mode == 'bayes' \ else solve2_inv(Ms, Mu, 2 * Mss - Ms, 2 * Mus + Mu, res_std, res2_std, fit_offset, fit_offset2) adata.layers['variance_velocity'] = beta[None, :] * (2 * Mus - 2 * Ms * Mu + Mu) - \ gamma[None, :] * (2 * Mss - 2 * Ms ** 2 - Ms) - \ offset2[None, :] + 2 * offset[None, :] * Ms # if mode == 'alpha': # Muu = second_order_moments_u(adata) # offset2u, alpha = solve_cov(np.ones(Mu.shape) + 2 * Mu, 2 * Muu - Mu) # pars.extend([offset2u, alpha]) # pars_str.extend(['_offset2u', '_alpha']) res, tot = Mu - gamma[None, :] * Ms - offset[None, :], Mu - Mu.mean(0) with warnings.catch_warnings(): warnings.simplefilter("ignore") r2 = np.ones(adata.n_vars) - prod_sum_obs(res, res) / prod_sum_obs(tot, tot) gamma[np.isnan(gamma)], r2[np.isnan(r2)], res[np.isnan(res)] = 0, 0, 0 pars = [offset, offset2, beta, gamma, r2] if stochastic else [offset, beta, gamma, r2] pars_str = ['_offset', '_offset2', '_beta', '_gamma', '_r2'] if stochastic else ['_offset', '_beta', '_gamma', '_r2'] for i, par in enumerate(pars_str): adata.var[vkey + par] = pars[i] adata.layers[vkey] = Mu - gamma[None, :] * Ms - offset[None, :] adata.var['velocity_genes'] = (r2 > .01) & (gamma > .01) if filter_genes: adata._inplace_subset_var(adata.var['velocity_genes']) logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.layers`\n' ' \'' + vkey + '\', velocity vectors for each individual cell') return adata if copy else None PK/~$MaH  "scvelo/tools/velocity_embedding.pyfrom ..logging import logg, settings from .utils import norm from .transition_matrix import transition_matrix import numpy as np import warnings def velocity_embedding(adata, basis='tsne', vkey='velocity', scale=10, retain_scale=False, copy=False): """Computes the single cell velocities in the embedding Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'tsne'`) Which embedding to use. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. scale: `int` (default: 10) Scale parameter of gaussian kernel for transition matrix. retain_scale: `bool` (default: `False`) Whether to retain scale from high dimensional space in embedding. Returns ------- Returns or updates `adata` with the attributes velocity_umap: `.obsm` coordinates of velocity projection on embedding """ T = transition_matrix(adata, vkey=vkey, basis=basis, scale=scale) logg.info('computing velocity embedding', r=True) if 'X_' + basis not in adata.obsm_keys(): raise ValueError( 'You need to run `tl.{}` first to compute embedded velocity vectors.').format(basis) X_emb = adata.obsm['X_' + basis][:, :2] V_emb = np.zeros((adata.n_obs, 2)) with warnings.catch_warnings(): warnings.simplefilter("ignore") if adata.n_obs < 8192: TA = T.A for i in range(adata.n_obs): indices = T[i].indices diff = X_emb[indices] - X_emb[i, None] diff /= norm(diff)[:, None] diff[np.isnan(diff)] = 0 # zero diff in a steady-state V_emb[i] = TA[i, indices].dot(diff) - diff.sum(0) / len(indices) else: for i in range(adata.n_obs): indices = T[i].indices diff = X_emb[indices] - X_emb[i, None] diff /= norm(diff)[:, None] diff[np.isnan(diff)] = 0 # zero diff in a steady-state V_emb[i] = T[i].data.dot(diff) - diff.sum(0) / len(indices) if retain_scale: delta = T.dot(adata.X) - adata.X cos_proj = (adata.obsm[vkey] * delta).sum(1) / norm(delta) V_emb *= np.clip(cos_proj, 0, 1) vkey += '_' + basis adata.obsm[vkey] = V_emb logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.obsm`\n' ' \'' + vkey + '\', embedded velocity vectors') return adata if copy else None PKtdMFscvelo/tools/velocity_graph.pyfrom ..logging import logg, settings from .utils import norm from .velocity import velocity from scanpy.api import Neighbors from scipy.sparse import coo_matrix import numpy as np import warnings def get_indices(dist): n_neighbors = (dist > 0).sum(1).min() rows_idx = np.where((dist > 0).sum(1) > n_neighbors)[0] for row_idx in rows_idx: col_idx = dist[row_idx].indices[n_neighbors:] dist[row_idx, col_idx] = 0 dist.eliminate_zeros() indices = dist.indices.reshape((-1, n_neighbors)) return indices, dist def get_iterative_indices(indices, index, n_recurse_neighbors): return indices[get_iterative_indices(indices, index, n_recurse_neighbors-1)] \ if n_recurse_neighbors > 1 else indices[index] class Cosines: def __init__(self, X, V, indices, n_recurse_neighbors, sqrt_transform=False): self.X = X.copy() self.indices = indices self.n_recurse_neighbors = n_recurse_neighbors self.sqrt_transform = sqrt_transform self.V = V.copy() if sqrt_transform: self.V = np.sqrt(np.abs(self.V)) * np.sign(self.V) self.V = self.V - self.V.mean(1)[:, None] self.V_norm = norm(self.V) def compute(self, ixs): vals, rows, cols, async_id = [], [], [], [] if self.sqrt_transform: for i in ixs: knn_ixs = np.unique(get_iterative_indices(self.indices, i, self.n_recurse_neighbors)) dX = self.X[knn_ixs] - self.X[i, None] dX = np.sqrt(np.abs(dX)) * np.sign(dX) dX -= dX.mean(1)[:, None] vals.extend(np.einsum('ij, j', dX, self.V[i]) / (norm(dX) * self.V_norm[i])[None, :]) rows.extend(np.ones(len(knn_ixs)) * i) cols.extend(knn_ixs) else: for i in ixs: knn_ixs = np.unique(get_iterative_indices(self.indices, i, self.n_recurse_neighbors)) dX = self.X[knn_ixs] - self.X[i, None] dX -= dX.mean(1)[:, None] vals.extend(np.einsum('ij, j', dX, self.V[i]) / (norm(dX) * self.V_norm[i])[None, :]) rows.extend(np.ones(len(knn_ixs)) * i) cols.extend(knn_ixs) async_id = int(ixs[0] / len(ixs)) # keep track of order vals = np.hstack(vals) vals[np.isnan(vals)] = 0 vals = np.clip(vals, 1e-10, 1) return [async_id, vals, rows, cols] def velocity_graph(adata, vkey='velocity', n_recurse_neighbors=2, n_neighbors=None, sqrt_transform=False, copy=False): """Computes a velocity graph based on cosine similarities. The cosine similarities are computed between velocities and potential cell state transitions. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. n_neighbors: `int` or `None` (default: None) Use fixed number of neighbors or do recursive neighbor search (if `None`). n_recurse_neighbors: `int` (default: 2) Number of recursions to be done for neighbors search. sqrt_transform: `bool` (default: `False`) Whether to variance-transform the cell states and velocities before computing cosine similarities. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes velocity_graph: `.uns` sparse matrix with transition probabilities """ if vkey not in adata.layers.keys(): velocity(adata) logg.info('computing velocity graph', r=True) if n_neighbors is not None: keys = [key for key in ['X_pca', 'X_tsne', 'X_umap'] if key in adata.obsm.keys()] neighs = Neighbors(adata) neighs.compute_neighbors(n_neighbors=n_neighbors, use_rep=keys[-1], n_pcs=10) indices = get_indices(dist=neighs.distances)[0] n_recurse_neighbors = 1 else: indices = get_indices(dist=adata.uns['neighbors']['distances'])[0] cos = Cosines(adata.layers['Ms'][:, adata.var['velocity_genes']], adata.layers[vkey][:, adata.var['velocity_genes']], indices, n_recurse_neighbors, sqrt_transform) with warnings.catch_warnings(): warnings.simplefilter("ignore") if True: _, vals, rows, cols = cos.compute(range(adata.n_obs)) # list(map(cos.compute, range(adata.n_obs))) else: from multiprocessing import Pool pool = Pool(n_jobs) ranges = np.array_split(range(adata.n_obs), n_jobs) result = pool.map_async(cos.compute, ranges).get() # async will probably destroy order result.sort(key=lambda tup: tup[0]) # restore order vals, rows, cols = [], [], [] for r in result: vals.extend(r[1]) rows.extend(r[2]) cols.extend(r[3]) graph = coo_matrix((vals, (rows, cols)), shape=(adata.n_obs, adata.n_obs)) graph.eliminate_zeros() adata.uns[vkey+'_graph'] = graph.tocsr() logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.uns`\n' ' \'' + vkey + '_graph\', sparse matrix with cosine correlations') return adata if copy else None PKMI #scvelo/tools/velocity_pseudotime.pyimport numpy as np def principal_curve(adata, basis='pca', n_comps=4, clusters_list=None, copy=False): """Computes the principal curve Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'pca'`) Basis to use for computing the principal curve. n_comps: `int` (default: 4) Number of pricipal components to be used. copy: `bool`, (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes principal_curve: `.uns` dictionary containing `projections`, `ixsort` and `arclength` """ import rpy2.robjects as robjects from rpy2.robjects.packages import importr if clusters_list is not None: cell_subset = np.array([label in clusters_list for label in adata.obs['clusters']]) X_emb = adata[cell_subset].obsm['X_' + basis][:, :n_comps] else: cell_subset = None X_emb = adata.obsm['X_' + basis][:, :n_comps] n_obs, n_dim = X_emb.shape # convert array to R matrix xvec = robjects.FloatVector(X_emb.T.reshape((X_emb.size))) X_R = robjects.r.matrix(xvec, nrow=n_obs, ncol=n_dim) fit = importr("princurve").principal_curve(X_R) adata.uns['principal_curve'] = dict() adata.uns['principal_curve']['ixsort'] = ixsort = np.array(fit[1])-1 adata.uns['principal_curve']['projections'] = np.array(fit[0])[ixsort] adata.uns['principal_curve']['arclength'] = np.array(fit[2]) adata.uns['principal_curve']['cell_subset'] = cell_subset return adata if copy else None # def pseudotime(adata, iroot=229, clusters_list=None, copy=False): # # iroot = adata.obsm['X_umap'][adata.obs['clusters']=='neuroblast 1'][:,1].argmax() # root_cell = adata.obs_names[iroot] # if clusters_list is not None: # ['granule mature', 'neuroblast 1', 'neuroblast 2', 'granule immature'] # cell_subset = np.array([label in clusters_list for label in adata.obs['clusters']]) # adata_subset = adata.copy() # adata_subset._inplace_subset_obs(cell_subset) # adata_subset.uns['iroot'] = np.where(adata_subset.obs_names == root_cell)[0][0] # dpt(adata_subset, n_branchings=0) # # adata.obs['dpt_pseudotime'] = np.zeros(adata.n_obs) # adata.obs['dpt_pseudotime'][cell_subset] = adata_subset.obs['dpt_pseudotime'] # adata.obs['dpt_pseudotime'][~cell_subset] = -.5 # # return adata if copy else None PKB~$Mp4 4 scvelo/tools/velocity_score.pyfrom ..logging import logg from .utils import prod_sum_var, norm from ..preprocessing.moments import moments from .transition_matrix import transition_matrix from scanpy.api.pp import neighbors import numpy as np def score_transition(adata, vkey='velocity', scale=10, copy=False): # compute velocity score as in correlation of mean transition with velocity if vkey not in adata.layers.keys(): raise ValueError( 'You need to run `tl.velocity` first.') dX = transition_matrix(adata, vkey=vkey, scale=scale).multiply(adata.layers['Ms']) - adata.layers['Ms'] dX -= dX.mean(1)[:, None] V = adata.layers[vkey].copy() V -= V.mean(1)[:, None] adata.obs[vkey+'_score_transition'] = prod_sum_var(dX, V) / (norm(dX) * norm(V)) logg.hint( 'added to `.obs`\n' ' ' + vkey + '_score_transition') return adata if copy else None def score_smoothness(adata, vkey='velocity', copy=False): if vkey not in adata.layers.keys(): raise ValueError( 'You need to run `tl.velocity` first.') X, V = adata.layers['Ms'].copy(), adata.layers[vkey].copy() n_neighbors = adata.uns['neighbors']['params']['n_neighbors'] - 1 indices = adata.uns['neighbors']['distances'].indices.reshape((-1, n_neighbors)) V -= V.mean(1)[:, None] V_norm = norm(V) R = np.zeros(adata.n_obs) for i in range(adata.n_obs): Vi_neighs = V[indices[i]] Vi_neighs -= Vi_neighs.mean(1)[:, None] R[i] = np.mean(np.einsum('ij, j', Vi_neighs, V[i]) / (norm(Vi_neighs) * V_norm[i])[None, :]) adata.obs[vkey + '_score_smoothness'] = R logg.hint( 'added to `.obs`\n' ' ' + vkey + '_score_smoothness') return adata if copy else None def random_subsample(adata, frac=.5): subset = np.random.choice([True, False], size=adata.n_obs, p=[frac, 1-frac]).sum() adata.obs['subset'] = subset adata_subset = adata[subset].copy() neighbors(adata_subset) moments(adata_subset) return adata_subset def score_robustness(adata, adata_subset, vkey='velocity', copy=False): V = adata[adata.obs['subset']].layers[vkey] V_subset = adata_subset.layers[vkey] adata_subset.obs[vkey + '_score_robustness'] = prod_sum_var(V, V_subset) / (norm(V) * norm(V_subset)) return adata_subset if copy else None PKwMm9}/scvelo-0.1.5.dist-info/LICENSEBSD 3-Clause License Copyright (c) 2018, Theis Lab All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PK!HSmPOscvelo-0.1.5.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,rzd&Y)r$[)T&UrPK!H]bscvelo-0.1.5.dist-info/METADATAWrF}ʋ)hf9Jڎrhxf .Wj!/!^*@L_Oi"rUӦTV%ͤZ*?T\7el7H:or^gjŲ(%2;)) fn$~lI~M\x3Ğ__QؘVU.ԙ_;LqKV>'j6Rb&p5cT6TekzhK. (~O΀NzӝO"P&ıINfKy^ִw;\gW-cS~eMdO +z0n.?%xaWxա\yr~W?+)nuY_!6~[;OZǽcTs٭YsMQ'USEĹ&'u }aH%-$R,Ҧ7IIrv2аـ켩\];s9-euɥ5QCG8ȶOqPu< O#YNc[6*n`i*o!U2]DRC[l&ħ5BܕP-Q_?hjxK,|pG}bwp?z$Ol"V8_cɛ 3$ɥF<~r}+XfGY|$[Mة~3bEQ; UE#٧*c+Ș8a)` ,==*kU܃+nn^\pb@pb೓̫r롡 Ɔ}Z?ڶ'Bܺw" f?O0a0y# }Kz}™p\`Ţ^?L5& s)PhD@l~E ] ā7L)`yu~uP,U`n)UǠ\?G4,XMXc aݔeqc],'V\x)R`ī[(x(߳ ?)ő[` 8^]uqmRF]vX] jX$H`G*qhÎۨ&W|Y5,Ds"f}ye} s)H%g~1\ؽtA%kkuiD52A| VVJ[M׆wVUO¤IGr"mT?PK!HShscvelo-0.1.5.dist-info/RECORDɒJ}? V3#AA 2$2 OQVwql>">@AC| !N6n% ,&b t3f4@pP?$ દ5i] ^C\eNcI; vDX*eE@UcPɽfpec0G3Dϫ8# Swi0,,+:aʇ^?gQ0mE ^иX,*F`RԹ~arMcmɿq(#I:Q-|˥bEtIP(P bZ*8k:ˮ$Opcm]P|eMgJ[3uU9bGvZA;ԣ}c`Ѓ+PX;++Ks4pqbOQ7ѽS1.($BR4h; <|,nM0l/2"n׍(ssb`[4_>ti1ˇCk+u&sF+RK4K~v .ZʓiswFesk:l[Aآ swچqQ9:;ag4SD\RS԰-CpVwzz/EZLl/MWώNa5E+&PHGZz:!scvelo/tools/transition_matrix.pyPKӸM\ۉscvelo/tools/utils.pyPK\M)scvelo/tools/velocity.pyPK/~$MaH  "Mscvelo/tools/velocity_embedding.pyPKtdMFscvelo/tools/velocity_graph.pyPKMI #żscvelo/tools/velocity_pseudotime.pyPKB~$Mp4 4 scvelo/tools/velocity_score.pyPKwMm9}/Lscvelo-0.1.5.dist-info/LICENSEPK!HSmPOmscvelo-0.1.5.dist-info/WHEELPK!H]bscvelo-0.1.5.dist-info/METADATAPK!HShscvelo-0.1.5.dist-info/RECORDPK