PKqMd(scvelo/__init__.py"""scvelo - stochastic single cell RNA velocity""" from .get_version import get_version __version__ = get_version(__file__) del get_version from .read_load import AnnData, read, read_loom, load, read_csv from .preprocessing.moments import Neighbors from .tools.run import run_all from .tools.velocity import Velocity from .tools.velocity_graph import VelocityGraph from . import pp from . import tl from . import pl from . import datasets from . import utils from . import logging from . import settings PK `qM}-scvelo/datasets.py"""Builtin Datasets. """ from .read_load import read, load from .preprocessing.utils import cleanup import numpy as np def toy_data(n_obs): """ Randomly samples a new dataset of size n_obs from the Dentate Gyrus dataset. Arguments --------- n_obs: `int` Size of the sampled dataset Returns ------- Returns `adata` object """ """Random samples from Dentate Gyrus. """ adata = dentategyrus() indices = np.random.choice(adata.n_obs, n_obs) adata = adata[indices] adata.obs_names = np.array(range(adata.n_obs), dtype='str') adata.var_names_make_unique() return adata def dentategyrus(): """Dentate Gyrus dataset from Hochgerner et al. (2018). Dentate gyrus is part of the hippocampus involved in learning, episodic memory formation and spatial coding. It is measured using 10X Genomics Chromium and described in Hochgerner et al. (2018). The data consists of 25,919 genes across 3,396 cells and provides several interesting characteristics. Returns ------- Returns `adata` object """ filename = 'data/DentateGyrus/10X43_1.loom' url = 'http://pklab.med.harvard.edu/velocyto/DG1/10X43_1.loom' adata = read(filename, backup_url=url, cleanup=True, sparse=True, cache=True) cleanup(adata, clean='all', keep={'spliced', 'unspliced', 'ambiguous'}) url_louvain = 'https://github.com/theislab/scvelo_notebooks/raw/master/data/DentateGyrus/DG_clusters.npy' url_umap = 'https://github.com/theislab/scvelo_notebooks/raw/master/data/DentateGyrus/DG_umap.npy' adata.obs['clusters'] = load('./data/DentateGyrus/DG_clusters.npy', url_louvain) adata.obsm['X_umap'] = load('./data/DentateGyrus/DG_umap.npy', url_umap) adata._sanitize() return adata def forebrain(): """Developing human forebrain. Forebrain tissue of a week 10 embryo, focusing on the glutamatergic neuronal lineage. Returns ------- Returns `adata` object """ filename = 'data/ForebrainGlut/hgForebrainGlut.loom' url = 'http://pklab.med.harvard.edu/velocyto/hgForebrainGlut/hgForebrainGlut.loom' adata = read(filename, backup_url=url, cleanup=True, sparse=True, cache=True) adata.var_names_make_unique() return adata PKNXMbxscvelo/get_version.py""" A minimalistic version helper in the spirit of versioneer, that is able to run without build step using pkg_resources. Developed by P Angerer, see https://github.com/flying-sheep/get_version. """ import re import os from pathlib import Path from subprocess import run, PIPE, CalledProcessError from typing import NamedTuple, List, Union, Optional RE_VERSION = r"([\d.]+?)(?:\.dev(\d+))?(?:[_+-]([0-9a-zA-Z.]+))?" RE_GIT_DESCRIBE = r"v?(?:([\d.]+)-(\d+)-g)?([0-9a-f]{7})(-dirty)?" ON_RTD = os.environ.get("READTHEDOCS") == "True" def match_groups(regex, target): match = re.match(regex, target) if match is None: raise re.error(f"Regex does not match “{target}”. RE Pattern: {regex}", regex) return match.groups() class Version(NamedTuple): release: str dev: Optional[str] labels: List[str] @staticmethod def parse(ver): release, dev, labels = match_groups(f"{RE_VERSION}$", ver) return Version(release, dev, labels.split(".") if labels else []) def __str__(self): release = self.release if self.release else "0.0" dev = f".dev{self.dev}" if self.dev else "" labels = f'+{".".join(self.labels)}' if self.labels else "" return f"{release}{dev}{labels}" def get_version_from_dirname(name, parent): """Extracted sdist""" parent = parent.resolve() re_dirname = re.compile(f"{name}-{RE_VERSION}$") if not re_dirname.match(parent.name): return None return Version.parse(parent.name[len(name) + 1 :]) def get_version_from_git(parent): parent = parent.resolve() try: p = run(["git", "rev-parse", "--show-toplevel"], cwd=parent, stdout=PIPE, stderr=PIPE, encoding="utf-8", check=True) except (OSError, CalledProcessError): return None if Path(p.stdout.rstrip("\r\n")).resolve() != parent.resolve(): return None p = run(["git", "describe", "--tags", "--dirty", "--always", "--long", "--match", "v[0-9]*"], cwd=parent, stdout=PIPE, stderr=PIPE, encoding="utf-8", check=True) release, dev, hex_, dirty = match_groups( f"{RE_GIT_DESCRIBE}$", p.stdout.rstrip("\r\n") ) labels = [] if dev == "0": dev = None else: labels.append(hex_) if dirty and not ON_RTD: labels.append("dirty") return Version(release, dev, labels) def get_version_from_metadata(name: str, parent: Optional[Path] = None): try: from pkg_resources import get_distribution, DistributionNotFound except ImportError: return None try: pkg = get_distribution(name) except DistributionNotFound: return None # For an installed package, the parent is the install location path_pkg = Path(pkg.location).resolve() if parent is not None and path_pkg != parent.resolve(): msg = f"""\ metadata: Failed; distribution and package paths do not match: {path_pkg} != {parent.resolve()}\ """ return None return Version.parse(pkg.version) def get_version(package: Union[Path, str]) -> str: """Get the version of a package or module Pass a module path or package name. The former is recommended, since it also works for not yet installed packages. Supports getting the version from #. The directory name (as created by ``setup.py sdist``) #. The output of ``git describe`` #. The package metadata of an installed package (This is the only possibility when passing a name) Args: package: package name or module path (``…/module.py`` or ``…/module/__init__.py``) """ path = Path(package) if not path.suffix and len(path.parts) == 1: # Is probably not a path v = get_version_from_metadata(package) if v: return str(v) if path.suffix != ".py": msg = f"“package” is neither the name of an installed module nor the path to a .py file." if path.suffix: msg += f" Unknown file suffix {path.suffix}" raise ValueError(msg) if path.name == "__init__.py": name = path.parent.name parent = path.parent.parent else: name = path.with_suffix("").name parent = path.parent return str( get_version_from_dirname(name, parent) or get_version_from_git(parent) or get_version_from_metadata(name, parent) or "0.0.0" ) __version__ = get_version(__file__) if __name__ == "__main__": print(__version__)PK~YMSDscvelo/logging.py"""Logging and Profiling """ from . import settings from datetime import datetime from time import time as get_time _VERBOSITY_LEVELS_FROM_STRINGS = {'error': 0, 'warn': 1, 'info': 2, 'hint': 3} def info(*args, **kwargs): return msg(*args, v='info', **kwargs) def error(*args, **kwargs): args = ('Error:',) + args return msg(*args, v='error', **kwargs) def warn(*args, **kwargs): args = ('WARNING:',) + args return msg(*args, v='warn', **kwargs) def hint(*args, **kwargs): return msg(*args, v='hint', **kwargs) def _settings_verbosity_greater_or_equal_than(v): if isinstance(settings.verbosity, str): settings_v = _VERBOSITY_LEVELS_FROM_STRINGS[settings.verbosity] else: settings_v = settings.verbosity return settings_v >= v def msg(*msg, v=4, time=False, memory=False, reset=False, end='\n', no_indent=False, t=None, m=None, r=None): """Write message to logging output. Log output defaults to standard output but can be set to a file by setting `sc.settings.log_file = 'mylogfile.txt'`. v : {'error', 'warn', 'info', 'hint'} or int, (default: 4) 0/'error', 1/'warn', 2/'info', 3/'hint', 4, 5, 6... time, t : bool, optional (default: False) Print timing information; restart the clock. memory, m : bool, optional (default: Faulse) Print memory information. reset, r : bool, optional (default: False) Reset timing and memory measurement. Is automatically reset when passing one of ``time`` or ``memory``. end : str (default: '\n') Same meaning as in builtin ``print()`` function. no_indent : bool (default: False) Do not indent for ``v >= 4``. """ # variable shortcuts if t is not None: time = t if m is not None: memory = m if r is not None: reset = r if isinstance(v, str): v = _VERBOSITY_LEVELS_FROM_STRINGS[v] if v == 3: # insert "--> " before hints msg = ('-->',) + msg if v >= 4 and not no_indent: msg = (' ',) + msg if _settings_verbosity_greater_or_equal_than(v): if not time and not memory and len(msg) > 0: _write_log(*msg, end=end) if reset: try: settings._previous_memory_usage, _ = get_memory_usage() except: pass settings._previous_time = get_time() if time: elapsed = get_passed_time() msg = msg + ('({})'.format(_sec_to_str(elapsed)),) _write_log(*msg, end=end) if memory: _write_log(get_memory_usage(), end=end) m = msg def _write_log(*msg, end='\n'): """Write message to log output, ignoring the verbosity level. This is the most basic function. Parameters ---------- *msg : One or more arguments to be formatted as string. Same behavior as print function. """ from .settings import logfile if logfile == '': print(*msg, end=end) else: out = '' for s in msg: out += str(s) + ' ' with open(logfile, 'a') as f: f.write(out + end) def _sec_to_str(t): """Format time in seconds. Parameters ---------- t : int Time in seconds. """ from functools import reduce return "%d:%02d:%02d.%02d" % \ reduce(lambda ll, b: divmod(ll[0], b) + ll[1:], [(t*100,), 100, 60, 60]) def get_passed_time(): now = get_time() elapsed = now - settings._previous_time settings._previous_time = now return elapsed def print_passed_time(): now = get_time() elapsed = now - settings._previous_time settings._previous_time = now from functools import reduce elapsed = "%d:%02d:%02d.%02d" % reduce(lambda ll, b: divmod(ll[0], b) + ll[1:], [(elapsed*100,), 100, 60, 60]) return elapsed def print_version(): from . import __version__ _write_log('Running scvelo', __version__, 'on {}.'.format(datetime.now().strftime("%Y-%m-%d %H:%M"))) def print_versions(): for mod in ['scvelo', 'scanpy', 'anndata', 'loompy', 'numpy', 'scipy', 'matplotlib', 'sklearn', 'pandas']: mod_name = mod[0] if isinstance(mod, tuple) else mod mod_install = mod[1] if isinstance(mod, tuple) else mod try: print('{}=={}'.format(mod_install, __import__(mod_name).__version__), end=' ') except (ImportError, AttributeError): pass print() def get_date_string(): return datetime.datetime.now().strftime("%Y-%m-%d %H:%M") from anndata.logging import print_memory_usage from anndata.logging import get_memory_usage from sys import stdout class ProgressReporter: def __init__(self, total, interval=3): self.count = 0 self.total = total self.timestamp = get_time() self.interval = interval def update(self): self.count += 1 if settings.verbosity > 1 and (get_time() - self.timestamp > self.interval or self.count == self.total): self.timestamp = get_time() percent = int(self.count * 100 / self.total) stdout.write('\r' + '... %d%%' % percent) stdout.flush() def finish(self): if settings.verbosity > 1: stdout.write('\r') stdout.flush() PK(MN/ scvelo/pl.pyfrom scvelo.plotting import * PK(Md## scvelo/pp.pyfrom scvelo.preprocessing import * PKQqM$;scvelo/read_load.pyimport os, re import numpy as np import pandas as pd from urllib.request import urlretrieve from pathlib import Path from scanpy.api import AnnData, read, read_loom def load(filename, backup_url=None, **kwargs): numpy_ext = {'npy', 'npz'} pandas_ext = {'csv', 'txt'} if not os.path.exists(filename) and backup_url is None: raise FileNotFoundError('Did not find file {}.'.format(filename)) elif not os.path.exists(filename): d = os.path.dirname(filename) if not os.path.exists(d): os.makedirs(d) urlretrieve(backup_url, filename) ext = Path(filename).suffixes[-1][1:] if ext in numpy_ext: return np.load(filename, **kwargs) elif ext in pandas_ext: return pd.read_csv(filename, **kwargs) else: raise ValueError('"{}" does not end on a valid extension.\n' 'Please, provide one of the available extensions.\n{}\n' .format(filename, numpy_ext|pandas_ext)) read_csv = load def clean_obs_names(data, base='[AGTCBDHKMNRSVWY]', ID_length=12, copy=False): """Cleans up the obs_names and identifies sample names. For example an obs_name 'samlple1_AGTCdate' is changed to 'AGTC' of the sample 'sample1_date'. The sample name is then saved in obs['sample_batch']. The genetic codes are identified according to according to https://www.neb.com/tools-and-resources/usage-guidelines/the-genetic-code. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. base: `str` (default: `[AGTCBDHKMNRSVWY]`) Genetic code letters to be identified. ID_length: `int` (default: 12) Length of the Genetic Codes in the samples. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes obs_names: list updated names of the observations sample_batch: `.obs` names of the identified sample batches """ def get_base_list(name, base): base_list = base while re.search(base_list + base, name) is not None: base_list += base if len(base_list) == 0: raise ValueError('Encountered an invalid ID in obs_names: ', name) return base_list adata = data.copy() if copy else data names = adata.obs_names base_list = get_base_list(names[0], base) if len(np.unique([len(name) for name in adata.obs_names])) == 1: start, end = re.search(base_list, names[0]).span() newIDs = [name[start:end] for name in names] start, end = 0, len(newIDs[0]) for i in range(end - ID_length): if np.any([ID[i] not in base for ID in newIDs]): start += 1 if np.any([ID[::-1][i] not in base for ID in newIDs]): end -= 1 newIDs = [ID[start:end] for ID in newIDs] prefixes = [names[i].replace(newIDs[i], '') for i in range(len(names))] else: prefixes, newIDs = [], [] for name in names: match = re.search(base_list, name) newID = re.search(get_base_list(name, base), name).group() if match is None else match.group() newIDs.append(newID) prefixes.append(name.replace(newID, '')) adata.obs_names = newIDs if len(prefixes[0]) > 0 and len(np.unique(prefixes)) > 1: #idx_names = np.random.choice(len(names), size=20, replace=False) #for i in range(len(names[0])): # if np.all([re.search(names[0][:i], names[ix]) for ix in idx_names]) is not None: obs_key = names[0][:i] adata.obs['sample_batch'] = prefixes adata._sanitize() adata.obs_names_make_unique() return adata if copy else None def merge(adata, ldata, copy=True): """Merges two annotated data matrices. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. ldata: :class:`~anndata.AnnData` Annotated data matrix. Returns ------- Returns a :class:`~anndata.AnnData` object """ common_obs = adata.obs_names.intersection(ldata.obs_names) if len(common_obs) == 0: clean_obs_names(adata) clean_obs_names(ldata) common_obs = adata.obs_names.intersection(ldata.obs_names) _adata = adata.copy() if adata.shape[1] >= ldata.shape[1] else ldata.copy() _ldata = ldata.copy() if adata.shape[1] >= ldata.shape[1] else adata.copy() _adata = _adata[common_obs] _ldata = _ldata[common_obs] for attr in _ldata.obs.keys(): _adata.obs[attr] = _ldata.obs[attr] for attr in _ldata.obsm.keys(): _adata.obsm[attr] = _ldata.obsm[attr] for attr in _ldata.uns.keys(): _adata.uns[attr] = _ldata.uns[attr] for attr in _ldata.layers.keys(): _adata.layers[attr] = _ldata.layers[attr] if _adata.shape[1] == _ldata.shape[1]: if np.all(adata.var_names == ldata.var_names): for attr in _ldata.var.keys(): _adata.var[attr] = _ldata.var[attr] for attr in _ldata.varm.keys(): _adata.varm[attr] = _ldata.varm[attr] else: raise ValueError('Variable names are not identical.') if not copy: adata = _adata return _adata if copy else None PKuM?QWAAscvelo/settings.py"""Settings """ # set global verbosity level to show errors(0), warnings(1), info(2) and hints(3) verbosity = 3 plot_suffix = '' """Global suffix that is appended to figure filenames. """ file_format_data = 'h5ad' """File format for saving AnnData objects. Allowed are 'txt', 'csv' (comma separated value file) for exporting and 'h5ad' (hdf5) for lossless saving. """ file_format_figs = 'pdf' """File format for saving figures. For example 'png', 'pdf' or 'svg'. Many other formats work as well (see `matplotlib.pyplot.savefig`). """ autosave = False """Save plots/figures as files in directory 'figs'. Do not show plots/figures interactively. """ autoshow = True """Show all plots/figures automatically if autosave == False. There is no need to call the matplotlib pl.show() in this case. """ writedir = './write/' """Directory where the function scanpy.write writes to by default. """ cachedir = './cache/' """Default cache directory. """ figdir = './figures/' """Directory where plots are saved. """ max_memory = 15 """Maximal memory usage in Gigabyte. Is currently not well respected.... """ n_jobs = 1 """Default number of jobs/ CPUs to use for parallel computing. """ logfile = '' """Name of logfile. By default is set to '' and writes to standard output.""" categories_to_ignore = ['N/A', 'dontknow', 'no_gate', '?'] """Categories that are omitted in plotting etc. """ _frameon = False """See set_figure_params. """ _rcParams_style = None """See set_figure_params. """ # -------------------------------------------------------------------------------- # Functions # -------------------------------------------------------------------------------- from matplotlib import rcParams from scanpy.plotting.rcmod import set_rcParams_scanpy from scanpy.plotting.utils import default_palette def set_rcParams_scvelo(fontsize=8, color_map=None, frameon=None): """Set matplotlib.rcParams to scvelo defaults.""" # dpi options (mpl default: 100, 100) rcParams['figure.dpi'] = 100 rcParams['savefig.dpi'] = 150 # figure (mpl default: 0.125, 0.96, 0.15, 0.91) rcParams['figure.figsize'] = (7, 5) rcParams['figure.subplot.left'] = 0.18 rcParams['figure.subplot.right'] = 0.96 rcParams['figure.subplot.bottom'] = 0.15 rcParams['figure.subplot.top'] = 0.91 # lines (defaults: 1.5, 6, 1) rcParams['lines.linewidth'] = 1.5 # the line width of the frame rcParams['lines.markersize'] = 6 rcParams['lines.markeredgewidth'] = 1 # font rcParams['font.sans-serif'] = \ ['Arial', 'Helvetica', 'DejaVu Sans', 'Bitstream Vera Sans', 'sans-serif'] fontsize = fontsize labelsize = 0.9 * fontsize # fonsizes (mpl default: 10, medium, large, medium) rcParams['font.size'] = fontsize rcParams['legend.fontsize'] = labelsize rcParams['axes.titlesize'] = fontsize rcParams['axes.labelsize'] = labelsize # legend (mpl default: 1, 1, 2, 0.8) rcParams['legend.numpoints'] = 1 rcParams['legend.scatterpoints'] = 1 rcParams['legend.handlelength'] = 0.5 rcParams['legend.handletextpad'] = 0.4 # color cycle rcParams['axes.prop_cycle'] = default_palette() # axes rcParams['axes.linewidth'] = 0.8 rcParams['axes.edgecolor'] = 'black' rcParams['axes.facecolor'] = 'white' # ticks (mpl default: k, k, medium, medium) rcParams['xtick.color'] = 'k' rcParams['ytick.color'] = 'k' rcParams['xtick.labelsize'] = labelsize rcParams['ytick.labelsize'] = labelsize # axes grid (mpl default: False, #b0b0b0) rcParams['axes.grid'] = False rcParams['grid.color'] = '.8' # color map rcParams['image.cmap'] = 'RdBu_r' if color_map is None else color_map # frame (mpl default: True) frameon = False if frameon is None else frameon global _frameon _frameon = frameon def set_figure_params(style='scvelo', figsize=None, dpi=None, dpi_save=None, frameon=None, vector_friendly=True, color_map=None, format='pdf', transparent=False, ipython_format='png2x'): """Set resolution/size, styling and format of figures. Arguments --------- style : `str` (default: `None`) Init default values for ``matplotlib.rcParams`` suited for `scvelo` or `scanpy`. Use `None` for the default matplotlib values. figsize: `[float, float]` (default: `None`) Width and height for default figure size. dpi : `int` (default: `None`) Resolution of rendered figures - this influences the size of figures in notebooks. dpi_save : `int` (default: `None`) Resolution of saved figures. This should typically be higher to achieve publication quality. frameon : `bool` (default: `None`) Add frames and axes labels to scatter plots. vector_friendly : `bool` (default: `True`) Plot scatter plots using `png` backend even when exporting as `pdf` or `svg`. color_map : `str` (default: `None`) Convenience method for setting the default color map. format : {'png', 'pdf', 'svg', etc.} (default: 'pdf') This sets the default format for saving figures: `file_format_figs`. transparent : `bool` (default: `True`) Save figures with transparent back ground. Sets `rcParams['savefig.transparent']`. ipython_format : list of `str` (default: 'png2x') Only concerns the notebook/IPython environment; see `IPython.core.display.set_matplotlib_formats` for more details. """ try: import IPython IPython.core.display.set_matplotlib_formats(ipython_format) except: pass from matplotlib import rcParams global _rcParams_style _rcParams_style = style global _vector_friendly _vector_friendly = vector_friendly global file_format_figs file_format_figs = format if transparent is not None: rcParams['savefig.transparent'] = transparent if style is 'scvelo': set_rcParams_scvelo(color_map=color_map, frameon=frameon) elif style is 'scanpy': # dpi is not specified by scanpy directly in the defaults if dpi is None: rcParams['figure.dpi'] = 80 if dpi_save is None: rcParams['savefig.dpi'] = 150 frameon = True if frameon is None else frameon global _frameon _frameon = frameon set_rcParams_scanpy(color_map=color_map) # Overwrite style options if given if figsize is not None: rcParams['figure.figsize'] = figsize if dpi is not None: rcParams['figure.dpi'] = dpi if dpi_save is not None: rcParams['savefig.dpi'] = dpi_save def set_rcParams_defaults(): """Reset `matplotlib.rcParams` to defaults.""" from matplotlib import rcParamsDefault rcParams.update(rcParamsDefault) # ------------------------------------------------------------------------------ # Private global variables & functions # ------------------------------------------------------------------------------ _vector_friendly = False """Set to true if you want to include pngs in svgs and pdfs. """ _low_resolution_warning = True """Print warning when saving a figure with low resolution.""" def _set_start_time(): from time import time return time() _start = _set_start_time() """Time when the settings module is first imported.""" _previous_time = _start """Variable for timing program parts.""" _previous_memory_usage = -1 """Stores the previous memory usage.""" PK(ME scvelo/tl.pyfrom scvelo.tools import * PKZftMuH scvelo/utils.pyfrom .preprocessing.utils import show_proportions, cleanup, set_initial_size, get_initial_size from .preprocessing.moments import get_connectivities, second_order_moments, second_order_moments_u from .tools.utils import prod_sum_obs, prod_sum_var, norm, vector_norm, R_squared, cosine_correlation, normalize, \ scale, get_indices, get_iterative_indices, groups_to_bool, randomized_velocity, extract_int_from_str from .tools.rank_velocity_genes import get_mean_var from .tools.run import convert_to_adata, convert_to_loom from .tools.optimization import leastsq_NxN, leastsq_generalized, maximum_likelihood from .tools.velocity_confidence import random_subsample from .tools.velocity_graph import vals_to_csr from .plotting.utils import is_categorical, clip, interpret_colorkey from .plotting.velocity_embedding_grid import compute_velocity_on_grid from .read_load import clean_obs_names, merge def merge_groups(adata, key, map_groups, key_added=None, map_colors=None): adata._sanitize() if len(map_groups) != len(adata.obs[key].cat.categories): map_coarse = {} for c in adata.obs[key].cat.categories: for group in map_groups: if any(cluster == c for cluster in map_groups[group]): map_coarse[c] = group if c not in map_coarse: map_coarse[c] = c map_groups = map_coarse if key_added is None: key_added = key + '_coarse' from pandas.api.types import CategoricalDtype adata.obs[key_added] = adata.obs[key].map(map_groups).astype(CategoricalDtype()) old_categories = adata.obs[key].cat.categories new_categories = adata.obs[key_added].cat.categories # map_colors is passed if map_colors is not None: old_colors = None if key + '_colors' in adata.uns: old_colors = adata.uns[key + '_colors'] new_colors = [] for group in adata.obs[key_added].cat.categories: if group in map_colors: new_colors.append(map_colors[group]) elif group in old_categories and old_colors is not None: new_colors.append(old_colors[old_categories.get_loc(group)]) else: raise ValueError('You didn\'t specify a color for {}.'.format(group)) adata.uns[key_added + '_colors'] = new_colors # map_colors is not passed elif key + '_colors' in adata.uns: old_colors = adata.uns[key + '_colors'] inverse_map_groups = {g: [] for g in new_categories} for old_group in old_categories: inverse_map_groups[map_groups[old_group]].append(old_group) new_colors = [] for group in new_categories: # take the largest of the old groups old_group = adata.obs[key][adata.obs[key].isin( inverse_map_groups[group])].value_counts().index[0] new_colors.append(old_colors[old_categories.get_loc(old_group)]) adata.uns[key_added + '_colors'] = new_colors PKdnM՝  scvelo/plotting/__init__.pyfrom .scatter import scatter from .velocity import velocity from .velocity_embedding import velocity_embedding from .velocity_embedding_grid import velocity_embedding_grid, velocity_embedding_stream from .utils import hist from scanpy.api.pl import paga, paga_compare PK,MS* * scvelo/plotting/docs.py"""Shared docstrings for plotting function parameters. """ from textwrap import dedent def doc_params(**kwds): """\ Docstrings should start with "\" in the first line for proper formatting. """ def dec(obj): obj.__doc__ = dedent(obj.__doc__).format(**kwds) return obj return dec doc_scatter = """\ basis: `str` (default='umap') Key for embedding. color: `str`, list of `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes use_raw : `bool` (default: `None`) Use `raw` attribute of `adata` if present. layer: `str`, list of `str` or `None` (default: `None`) Specify the layer for `color`. color_map: `str` (default: `matplotlib.rcParams['image.cmap']`) String denoting matplotlib color map. colorbar: `bool` (default: `False`) Whether to show colorbar. palette: list of `str` (default: `None`) Colors to use for plotting groups (categorical annotation). size: `float` (default: 5) Point size. alpha: `float` (default: 1) Set blending - 0 transparent to 1 opaque. perc: tuple, e.g. [2,98] (default: `None`) Specify percentile for continuous coloring. sort_order: `bool` (default: `True`) For continuous annotations used as color parameter, plot data points with higher values on top of others. groups: `str` (default: `all groups`) Restrict to a few categories in categorical observation annotation. components: `str` or list of `str` (default: '1,2') For instance, ['1,2', '2,3']. projection: {'2d', '3d'} (default: '2d') Projection of plot. legend_loc: str (default: 'none') Location of legend, either 'on data', 'right margin' or valid keywords for matplotlib.legend. legend_fontsize: `int` (default: `None`) Legend font size. legend_fontweight: {'normal', 'bold', ...} (default: `None`) Legend font weight. Defaults to 'bold' if `legend_loc = 'on data'`, otherwise to 'normal'. Available are `['light', 'normal', 'medium', 'semibold', 'bold', 'heavy', 'black']`. right_margin: `float` or list of `float` (default: `None`) Adjust the width of the space right of each plotting panel. left_margin: `float` or list of `float` (default: `None`) Adjust the width of the space left of each plotting panel. xlabel: `str` (default: `None`) Label of x-axis. ylabel: `str` (default: `None`) Label of y-axis. title: `str` (default: `None`) Provide title for panels either as, e.g. `["title1", "title2", ...]`. fontsize: `float` (default: `None`) Label font size. figsize: tuple (default: `(7,5)`) Figure size. dpi: `int` (default: 80) Figure dpi. frameon: `bool` (default: `True`) Draw a frame around the scatter plot. show: `bool`, optional (default: `None`) Show the plot, do not return axis. save: `bool` or `str`, optional (default: `None`) If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}. ax: `matplotlib.Axes`, optional (default: `None`) A matplotlib axes object. Only works if plotting a single component.\ """PK Mv77scvelo/plotting/pseudotime.pyfrom .utils import * def principal_curve(adata): X_curve = adata.uns['principal_curve']['projections'] ixsort = adata.uns['principal_curve']['ixsort'] pl.plot(X_curve[ixsort, 0], X_curve[ixsort, 1], c="k", lw=3, zorder=2000000) def pseudotime(adata, gene_list, ckey='velocity', reverse=False): ixsort = adata.uns['principal_curve']['ixsort'] arclength = adata.uns['principal_curve']['arclength'] if reverse: arclength /= np.max(arclength) else: arclength = (np.max(arclength) - arclength) / np.max(arclength) cell_subset = adata.uns['principal_curve']['cell_subset'] adata_subset = adata[cell_subset].copy() gs = pl.GridSpec(1, len(gene_list)) for n, gene in enumerate(gene_list): i = np.where(adata_subset.var_names == gene)[0][0] ax = pl.subplot(gs[n]) lb, ub = np.percentile(adata_subset.obsm[ckey][:, i], [.5, 99.5]) c = np.clip(adata_subset.obsm[ckey][ixsort, i], lb, ub) # pl.scatter(arclength[ixsort], adata2.obsm['Mu'][ixsort, i], alpha=0.7, c='b', s=5, label="unspliced") pl.scatter(arclength[ixsort], adata_subset.obsm['Ms'][ixsort, i] * adata_subset.uns['velocity_pars']['gamma'][i], c=c, cmap='coolwarm', alpha=1, s=1, label="spliced") c = c / abs(c).max() * (adata_subset.obsm['Ms'][ixsort, i] * adata_subset.uns['velocity_pars']['gamma'][i]).max() z = np.ma.polyfit(arclength[ixsort], c, 8) fun = np.poly1d(z) pl.plot(arclength[ixsort], fun(arclength[ixsort]), label=ckey) # Hide the right and top spines ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) pl.ylabel(gene) pl.title('Colored by ' + ckey) PK\vMAKscvelo/plotting/scatter.pyfrom .. import settings from .utils import is_categorical, update_axes, set_label, set_title, default_basis, default_color, default_color_map, \ default_size, interpret_colorkey, get_components, set_colorbar, savefig from .docs import doc_scatter, doc_params import scanpy.api.pl as scpl from matplotlib import rcParams import matplotlib.pyplot as pl import numpy as np import pandas as pd from scipy.sparse import issparse @doc_params(scatter=doc_scatter) def scatter(adata, x=None, y=None, basis=None, vkey=None, color=None, use_raw=None, layer=None, color_map=None, colorbar=False, palette=None, size=None, alpha=None, perc=None, sort_order=True, groups=None, components=None, projection='2d', legend_loc='none', legend_fontsize=None, legend_fontweight=None, right_margin=None, left_margin=None, xlabel=None, ylabel=None, title=None, fontsize=None, figsize=None, dpi=None, frameon=None, show=True, save=None, ax=None, **kwargs): """\ Scatter plot along observations or variables axes. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate {scatter} Returns ------- If `show==False` a `matplotlib.Axis` """ colors = pd.unique(color) if isinstance(color, (list, tuple, np.record)) else [color] layers = pd.unique(layer) if isinstance(layer, (list, tuple, np.ndarray, np.record)) else [layer] bases = pd.unique(basis) if isinstance(basis, (list, tuple, np.ndarray, np.record)) else [basis] scatter_kwargs = {"use_raw": use_raw, "sort_order": sort_order, "alpha": alpha, "components": components, "projection": projection, "legend_loc": legend_loc, "groups": groups, "palette": palette, "legend_fontsize": legend_fontsize, "legend_fontweight": legend_fontweight, "title": title, "right_margin": right_margin, "left_margin": left_margin, "show": False, "save": None} multikey = colors if len(colors) > 1 else layers if len(layers) > 1 else bases if len(bases) > 1 else None if multikey is not None: figsize = rcParams['figure.figsize'] if figsize is None else figsize for i, gs in enumerate( pl.GridSpec(1, len(multikey), pl.figure(None, (figsize[0] * len(multikey), figsize[1]), dpi=dpi))): scatter(adata, x=x, y=y, size=size, xlabel=xlabel, ylabel=ylabel, color_map=color_map, colorbar=colorbar, perc=perc, frameon=frameon, ax=pl.subplot(gs), color=colors[i] if len(colors) > 1 else color, layer=layers[i] if len(layers) > 1 else layer, basis=bases[i] if len(bases) > 1 else basis, **scatter_kwargs, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: if projection == '3d': from mpl_toolkits.mplot3d import Axes3D ax = pl.figure(None, figsize, dpi=dpi).gca(projection=projection) if ax is None else ax else: ax = pl.figure(None, figsize, dpi=dpi).gca() if ax is None else ax color, layer = colors[0], layers[0] color = default_color(adata) if color is None else color color_map = default_color_map(adata, color) if color_map is None else color_map frameon = frameon if frameon is not None else settings._frameon is_embedding = ((x is None) | (y is None)) and basis not in adata.var_names basis = default_basis(adata) if basis is None and is_embedding else basis size = default_size(adata) if size is None else size if is_categorical(adata, color) and is_embedding: ax = scpl.scatter(adata, basis=basis, color=color, color_map=color_map, size=size, frameon=frameon, ax=ax, **scatter_kwargs, **kwargs) else: if basis in adata.var_names: x = adata[:, basis].layers['spliced'] if use_raw else adata[:, basis].layers['Ms'] y = adata[:, basis].layers['unspliced'] if use_raw else adata[:, basis].layers['Mu'] x, y = x.A if issparse(x) else x, y.A if issparse(y) else y xlabel, ylabel, title = 'spliced', 'unspliced', basis elif is_embedding: X_emb = adata.obsm['X_' + basis][:, get_components(components, basis)] x, y = X_emb[:, 0], X_emb[:, 1] elif isinstance(x, str) and isinstance(y, str) and x in adata.var_names and y in adata.var_names: x = adata[:, x].layers[layer] if layer in adata.layers.keys() else adata[:, x].X y = adata[:, y].layers[layer] if layer in adata.layers.keys() else adata[:, y].X c = interpret_colorkey(adata, color, layer, perc) if layer is not None and 'velocity' in layer and isinstance(color, str) and color in adata.var_names: ub = np.percentile(np.abs(c), 98) kwargs.update({"vmin": -ub, "vmax": ub}) if layer is not None and ('spliced' in layer or 'Ms' in layer or 'Mu' in layer) \ and isinstance(color, str) and color in adata.var_names: ub = np.percentile(c, 98) kwargs.update({"vmax": ub}) pl.scatter(x, y, c=c, cmap=color_map, s=size, alpha=alpha, edgecolors='none', marker='.', zorder=0, **kwargs) set_label(xlabel, ylabel, fontsize, basis) set_title(title, layer, color, fontsize) ax = update_axes(ax, fontsize, is_embedding, frameon) if basis in adata.var_names: xnew = np.linspace(0, x.max() * 1.02) fits = [fit for fit in adata.layers.keys() if all(['velocity' in fit, fit + '_gamma' in adata.var.keys()])] for fit in fits: linestyle = '--' if 'variance_' + fit in adata.layers.keys() else '-' gamma = adata[:, basis].var[fit + '_gamma'].values if fit + '_gamma' in adata.var.keys() else 1 beta = adata[:, basis].var[fit + '_beta'].values if fit + '_beta' in adata.var.keys() else 1 offset = adata[:, basis].var[fit + '_offset'].values if fit + '_offset' in adata.var.keys() else 0 pl.plot(xnew, gamma / beta * xnew + offset / beta, c='k', linestyle=linestyle) if colorbar and not is_categorical(adata, color): set_colorbar(ax) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return axPKSvMW ++scvelo/plotting/utils.pyfrom .. import settings import numpy as np import matplotlib.pyplot as pl from matplotlib.ticker import MaxNLocator from mpl_toolkits.axes_grid1.inset_locator import inset_axes from scanpy.plotting.utils import savefig_or_show, default_palette, adjust_palette from matplotlib.colors import is_color_like from pandas.api.types import is_categorical as cat from scipy.sparse import issparse def is_categorical(adata, c): adata._sanitize() # Indentify array of categorical type and transform where applicable return isinstance(c, str) and (c in adata.obs.keys() and cat(adata.obs[c]) or is_color_like(c)) def default_basis(adata): keys = [key for key in ['pca', 'tsne', 'umap'] if 'X_' + key in adata.obsm.keys()] return keys[-1] if len(keys) > 0 else None def update_axes(ax, fontsize, is_embedding, frameon): frameon = settings._frameon if frameon is None else frameon if frameon: if is_embedding: ax.tick_params(which='both', bottom=False, left=False, labelbottom=False, labelleft=False) else: ax.xaxis.set_major_locator(MaxNLocator(nbins=3, integer=True)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3, integer=True)) labelsize = int(fontsize * .75) if fontsize is not None else None ax.tick_params(axis='both', which='major', labelsize=labelsize) else: ax.set_xlabel('') ax.set_ylabel('') ax.tick_params(which='both', bottom=False, left=False, labelbottom=False, labelleft=False) ax.set_frame_on(False) return ax def set_label(xlabel, ylabel, fontsize=None, basis=None): if isinstance(xlabel, str) and isinstance(ylabel, str): pl.xlabel(xlabel, fontsize=fontsize) pl.ylabel(ylabel, fontsize=fontsize) elif basis is not None: component_name = ('DC' if basis == 'diffmap' else 'tSNE' if basis == 'tsne' else 'UMAP' if basis == 'umap' else 'PC' if basis == 'pca' else basis.replace('draw_graph_', '').upper() if 'draw_graph' in basis else basis) pl.xlabel(component_name + '1') pl.ylabel(component_name + '2') def set_title(title, layer=None, color=None, fontsize=None): if isinstance(title, str): pl.title(title, fontsize=fontsize) elif isinstance(layer, str) and isinstance(color, str): pl.title(color + ' ' + layer, fontsize=fontsize) elif isinstance(color, str): pl.title(color, fontsize=fontsize) def set_frame(ax, frameon): frameon = settings._frameon if frameon is None else frameon if not frameon: ax.set_xlabel('') ax.set_ylabel('') ax.set_frame_on(False) return ax def default_size(adata): adjusted, classic = 1.2e5 / adata.n_obs, 20 return np.mean([adjusted, classic]) if settings._rcParams_style == 'scvelo' else adjusted def default_color(adata): return 'clusters' if 'clusters' in adata.obs.keys() else 'louvain' if 'louvain' in adata.obs.keys() else 'grey' def default_color_map(adata, c): return 'viridis_r' if isinstance(c, str) and c in adata.obs.keys() and not is_categorical(adata, c)\ and adata.obs[c].min() == 0 and adata.obs[c].max() == 1 else None def clip(c, perc=None): if isinstance(perc, int): perc = [perc, 100] if perc < 50 else [0, perc] lb, ub = np.percentile(c, perc) return np.clip(c, lb, ub) def get_colors(adata, c): if is_color_like(c): return c else: if c+'_colors' not in adata.uns.keys(): palette = default_palette(None) palette = adjust_palette(palette, length=len(adata.obs[c].cat.categories)) adata.uns[c + '_colors'] = palette[:len(adata.obs[c].cat.categories)].by_key()['color'] cluster_ix = adata.obs[c].cat.codes return np.array([adata.uns[c + '_colors'][cluster_ix[i]] for i in range(adata.n_obs)]) def interpret_colorkey(adata, c=None, layer=None, perc=None): if c is None: c = default_color(adata) if is_categorical(adata, c): c = get_colors(adata, c) elif isinstance(c, str): if c in adata.obs.keys(): # color by observation key c = adata.obs[c] elif c in adata.var_names: # color by var in specific layer c = adata[:, c].layers[layer] if layer in adata.layers.keys() else adata[:, c].X c = c.A.flatten() if issparse(c) else c else: raise ValueError('color key is invalid! pass valid observation annotation or a gene name') if perc is not None: c = clip(c, perc=perc) elif len(np.array(c).flatten()) == adata.n_obs: # continuous coloring c = np.array(c).flatten() if perc is not None: c = clip(c, perc=perc) else: raise ValueError('color key is invalid! pass valid observation annotation or a gene name') return c def get_components(components=None, basis=None): if components is None: components = '1,2' if isinstance(components, str): components = components.split(',') components = np.array(components).astype(int) - 1 if basis == 'diffmap': components += 1 return components def set_colorbar(ax, orientation='vertical'): cb = pl.colorbar(orientation=orientation, cax=inset_axes(ax, width="2%", height="30%", loc=4, borderpad=0)) cb.set_alpha(1) cb.draw_all() cb.locator = MaxNLocator(nbins=3, integer=True) cb.update_ticks() def default_arrow(size): if isinstance(size, (list, tuple)) and len(size) == 3: head_l, head_w, ax_l = size elif type(size) == int or type(size) == float: head_l, head_w, ax_l = 12 * size, 10 * size, 8 * size else: head_l, head_w, ax_l = 12, 10, 8 return head_l, head_w, ax_l def savefig(writekey, show=False, dpi=None, save=None): """Save current figure to file. """ savefig_or_show('velocity_' + writekey + '_' if writekey != '' else 'velocity_', dpi=dpi, save=save, show=show) def hist(arrays, alpha=.5, bins=None, colors=None, labels=None, xlabel=None, ylabel=None, ax=None, figsize=None, dpi=None): ax = pl.figure(None, figsize, dpi=dpi) if ax is None else ax arrays = arrays if isinstance(arrays, (list, tuple)) else [arrays] palette = default_palette(None)[::3][:len(arrays)].by_key()['color'] colors = palette if colors is None or len(colors) < len(arrays) else colors for i, array in enumerate(arrays): pl.hist(array[np.isfinite(array)], bins=bins, alpha=alpha, color=colors[i], label=labels[i] if labels is not None else None) pl.legend() pl.xlabel(xlabel if xlabel is not None else '') pl.ylabel(ylabel if xlabel is not None else '') pl.show() # def phase(adata, var=None, x=None, y=None, color='louvain', fits='all', xlabel='spliced', ylabel='unspliced', # fontsize=None, show=True, ax=None, **kwargs): # if isinstance(var, str) and (var in adata.var_names): # if (x is None) or (y is None): # ix = np.where(adata.var_names == var)[0][0] # x, y = adata.layers['Ms'][:, ix], adata.layers['Mu'][:, ix] # else: # ValueError('var not found in adata.var_names.') # # ax = scatter(adata, x=x, y=y, color=color, frameon=True, title=var, xlabel=xlabel, ylabel=ylabel, ax=ax, **kwargs) # # xnew = np.linspace(0, x.max() * 1.02) # fits = adata.layers.keys() if fits == 'all' else fits # fits = [fit for fit in fits if 'velocity' in fit] # for fit in fits: # linestyle = '--' if 'stochastic' in fit else '-' # pl.plot(xnew, adata.var[fit+'_gamma'][ix] / adata.var[fit+'_beta'][ix] * xnew # + adata.var[fit+'_offset'][ix] / adata.var[fit+'_beta'][ix], c='k', linestyle=linestyle) # # if show: pl.show() # else: return axPKuM0iscvelo/plotting/velocity.pyfrom ..preprocessing.moments import second_order_moments from ..tools.rank_velocity_genes import rank_velocity_genes from .scatter import scatter from .utils import savefig, default_basis, default_size import numpy as np import pandas as pd from matplotlib import rcParams from matplotlib.ticker import MaxNLocator import matplotlib.pyplot as pl from scipy.sparse import issparse def velocity(adata, var_names=None, basis=None, mode=None, fits='all', layers='all', use_raw=False, color=None, color_map='RdBu_r', perc=[2,98], size=None, alpha=.5, fontsize=None, figsize=None, dpi=None, show=True, save=None, ax=None, **kwargs): """Phase and velocity plot for set of genes. The phase plot shows spliced against unspliced expressions with steady-state fit. Further the embedding is shown colored by velocity and expression. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. var_names: `str` or list of `str` (default: `None`) Which variables to show. basis: `str` (default: `'umap'`) Key for embedding coordinates. mode: `'stochastic'` or `None` (default: `None`) Whether to show show covariability phase portrait. fits: `str` or list of `str` (default: `'all'`) Which steady-state estimates to show. layers: `str` or list of `str` (default: `'all'`) Which layers to show. color: `str`, list of `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes color_map: `str` (default: `matplotlib.rcParams['image.cmap']`) String denoting matplotlib color map. perc: tuple, e.g. [2,98] (default: `None`) Specify percentile for continuous coloring. size: `float` (default: 5) Point size. alpha: `float` (default: 1) Set blending - 0 transparent to 1 opaque. fontsize: `float` (default: `None`) Label font size. figsize: tuple (default: `(7,5)`) Figure size. dpi: `int` (default: 80) Figure dpi. show: `bool`, optional (default: `None`) Show the plot, do not return axis. save: `bool` or `str`, optional (default: `None`) If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}. ax: `matplotlib.Axes`, optional (default: `None`) A matplotlib axes object. Only works if plotting a single component. """ basis = default_basis(adata) if basis is None else basis var_names = [var_names] if isinstance(var_names, str) else var_names \ if isinstance(var_names, (list, tuple, np.ndarray, np.record)) else rank_velocity_genes(adata, basis=basis, n_genes=4) valid_var_names = adata.var_names[adata.var['velocity_genes']] if 'velocity_genes' in adata.var.keys() else adata.var_names var_names = pd.unique([var for var in var_names if var in valid_var_names]) (skey, ukey) = ('spliced', 'unspliced') if use_raw else ('Ms', 'Mu') layers = ['velocity', skey, 'variance_velocity'] if layers == 'all' else layers layers = [layer for layer in layers if layer in adata.layers.keys()] fits = adata.layers.keys() if fits == 'all' else fits fits = [fit for fit in fits if all(['velocity' in fit, fit + '_gamma' in adata.var.keys()])] stochastic_fits = [fit for fit in fits if 'variance_' + fit in adata.layers.keys()] figsize = rcParams['figure.figsize'] if figsize is None else figsize n_row, n_col = len(var_names), (1 + len(layers) + (mode == 'stochastic')*2) ax = pl.figure(figsize=(figsize[0]*n_col/2, figsize[1]*n_row/2), dpi=dpi) if ax is None else ax gs = pl.GridSpec(n_row, n_col, wspace=0.3, hspace=0.5) size = default_size(adata) / 2 if size is None else size # since fontsize is halved in width and height fontsize = rcParams['font.size'] if fontsize is None else fontsize for v, var in enumerate(var_names): _adata = adata[:, var] s, u = _adata.layers[skey], _adata.layers[ukey] if issparse(s): s, u = s.A, u.A # spliced/unspliced phase portrait with steady-state estimate ax = pl.subplot(gs[v * n_col]) scatter(adata, basis=var, color=color, frameon=True, title=var, size=size, alpha=alpha, fontsize=fontsize, xlabel='spliced', ylabel='unspliced', show=False, ax=ax, save=False, **kwargs) xnew = np.linspace(0, s.max() * 1.02) for fit in fits: linestyle = '--' if fit in stochastic_fits else '-' gamma = _adata.var[fit + '_gamma'].values if fit + '_gamma' in adata.var.keys() else 1 beta = _adata.var[fit + '_beta'].values if fit + '_beta' in adata.var.keys() else 1 offset = _adata.var[fit + '_offset'].values if fit + '_offset' in adata.var.keys() else 0 pl.plot(xnew, gamma / beta * xnew + offset / beta, c='k', linestyle=linestyle) if v == len(var_names)-1: pl.legend(fits, loc='lower right', prop={'size': .5*fontsize}) ax.xaxis.set_major_locator(MaxNLocator(nbins=3)) ax.yaxis.set_major_locator(MaxNLocator(nbins=3)) ax.tick_params(axis='both', which='major', labelsize=.7*fontsize) # velocity and expression plots for l, layer in enumerate(layers): ax = pl.subplot(gs[v*n_col + l + 1]) title = 'expression' if layer == skey else layer scatter(adata, basis=basis, color=var, layer=layer, color_map=color_map, title=title, perc=perc, fontsize=fontsize, size=size, alpha=alpha, frameon=False, show=False, ax=ax, save=False, **kwargs) if mode == 'stochastic': ss, us = second_order_moments(_adata) ss, us = ss.flatten(), us.flatten() fit = stochastic_fits[0] ax = pl.subplot(gs[v*n_col + len(layers) + 1]) offset = _adata.var[fit + '_offset'] if fit + '_offset' in adata.var.keys() else 0 beta = _adata.var[fit + '_beta'] if fit + '_beta' in adata.var.keys() else 1 x = 2 * (ss - s**2) - s y = 2 * (us - u * s) + u + 2 * s * offset / beta scatter(adata, x=x, y=y, color=color, title=var, fontsize=40/n_col, size=size, perc=perc, show=False, xlabel=r'2 $\Sigma_s - \langle s \rangle$', ylabel=r'2 $\Sigma_{us} + \langle u \rangle$', frameon=True, ax=ax, save=False, **kwargs) xnew = np.linspace(x.min(), x.max() * 1.02) for fit in stochastic_fits: gamma = _adata.var[fit + '_gamma'].values if fit + '_gamma' in adata.var.keys() else 1 beta = _adata.var[fit + '_beta'].values if fit + '_beta' in adata.var.keys() else 1 offset2 = _adata.var[fit + '_offset2'].values if fit + '_offset2' in adata.var.keys() else 0 pl.plot(xnew, gamma / beta * xnew + offset2 / beta, c='k', linestyle='--') if v == len(var_names) - 1: pl.legend(fits, loc='lower right', prop={'size': 34/n_col}) if isinstance(save, str): savefig('', dpi=dpi, save=save, show=show) if show: pl.show() else: return ax PK]vM1m%scvelo/plotting/velocity_embedding.pyfrom ..tools.velocity_embedding import velocity_embedding as compute_velocity_embedding from ..tools.utils import groups_to_bool from .utils import interpret_colorkey, default_basis, default_size, get_components, savefig, default_color, default_arrow from .scatter import scatter from .docs import doc_scatter, doc_params from matplotlib import rcParams from matplotlib.colors import is_color_like import matplotlib.pyplot as pl import numpy as np import pandas as pd @doc_params(scatter=doc_scatter) def velocity_embedding(adata, basis=None, vkey='velocity', density=None, arrow_size=None, arrow_length=None, scale=None, X=None, V=None, color=None, use_raw=None, layer=None, color_map=None, colorbar=False, palette=None, size=None, alpha=.2, perc=None, sort_order=True, groups=None, components=None, projection='2d', legend_loc='none', legend_fontsize=None, legend_fontweight=None, right_margin=None, left_margin=None, xlabel=None, ylabel=None, title=None, fontsize=None, figsize=None, dpi=None, frameon=None, show=True, save=None, ax=None, **kwargs): """\ Scatter plot of velocities on the embedding Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate vkey: `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. density: `float` (default: 1) Amount of velocities to show - 0 none to 1 all arrow_size: `float` or 3-tuple for headlength, headwidth and headaxislength (default: 1) Size of arrows. arrow_length: `float` (default: 1) Length of arrows. scale: `float` (default: 1) Length of velocities in the embedding. {scatter} Returns ------- `matplotlib.Axis` if `show==False` """ basis = default_basis(adata) if basis is None else basis colors = pd.unique(color) if isinstance(color, (list, tuple, np.record)) else [color] layers = pd.unique(layer) if isinstance(layer, (list, tuple, np.ndarray, np.record)) else [layer] vkeys = pd.unique(vkey) if isinstance(vkey, (list, tuple, np.ndarray, np.record)) else [vkey] for key in vkeys: if key + '_' + basis not in adata.obsm_keys() and V is None: compute_velocity_embedding(adata, basis=basis, vkey=key) scatter_kwargs = {"basis": basis, "perc": perc, "use_raw": use_raw, "sort_order": sort_order, "alpha": alpha, "components": components, "projection": projection, "legend_loc": legend_loc, "groups": groups, "legend_fontsize": legend_fontsize, "legend_fontweight": legend_fontweight, "palette": palette, "color_map": color_map, "frameon": frameon, "title": title, "xlabel": xlabel, "ylabel": ylabel, "right_margin": right_margin, "left_margin": left_margin, "colorbar": colorbar, "dpi": dpi, "fontsize": fontsize, "show": False, "save": None} multikey = colors if len(colors) > 1 else layers if len(layers) > 1 else vkeys if len(vkeys) > 1 else None if multikey is not None: figsize = rcParams['figure.figsize'] if figsize is None else figsize for i, gs in enumerate(pl.GridSpec(1, len(multikey), pl.figure(None, (figsize[0]*len(multikey), figsize[1]), dpi=dpi))): velocity_embedding(adata, density=density, scale=scale, size=size, ax=pl.subplot(gs), arrow_size=arrow_size, arrow_length=arrow_length, color=colors[i] if len(colors) > 1 else color, layer=layers[i] if len(layers) > 1 else layer, vkey=vkeys[i] if len(vkeys) > 1 else vkey, **scatter_kwargs, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: if projection == '3d': from mpl_toolkits.mplot3d import Axes3D ax = pl.figure(None, figsize, dpi=dpi).gca(projection=projection) if ax is None else ax else: ax = pl.figure(None, figsize, dpi=dpi).gca() if ax is None else ax color, layer, vkey = colors[0], layers[0], vkeys[0] color = default_color(adata) if color is None else color _adata = adata[groups_to_bool(adata, groups, groupby=color)] if groups is not None and color in adata.obs.keys() else adata density = 1 if density is None or density > 1 else density ix_choice = np.random.choice(_adata.n_obs, size=int(density * _adata.n_obs), replace=False) x, y = None if X is None else X[:, 0], None if X is None else X[:, 1] X = _adata.obsm['X_' + basis][:, get_components(components, basis)][ix_choice] if X is None else X[:, :2][ix_choice] V = _adata.obsm[vkey + '_' + basis][:, get_components(components, basis)][ix_choice] if V is None else V[:, :2][ix_choice] hl, hw, hal = default_arrow(arrow_size) scale = 1 / arrow_length if arrow_length is not None else scale if scale is not None else 1 quiver_kwargs = {"scale": scale, "cmap": color_map, "angles": 'xy', "scale_units": 'xy', "width": .0005, "edgecolors": 'k', "headlength": hl, "headwidth": hw, "headaxislength": hal, "linewidth": .1} quiver_kwargs.update(kwargs) c = interpret_colorkey(_adata, color, layer, perc) c = c[ix_choice] if len(c) == _adata.n_obs else c if is_color_like(c[0]): pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], color=c, zorder=3, **quiver_kwargs) else: pl.quiver(X[:, 0], X[:, 1], V[:, 0], V[:, 1], c, zorder=3, **quiver_kwargs) size = default_size(adata) / 2 if size is None else size, ax = scatter(adata, x=x, y=y, layer=layer, color=color, size=size, ax=ax, **scatter_kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax PK^vM>G77*scvelo/plotting/velocity_embedding_grid.pyfrom ..tools.velocity_embedding import quiver_autoscale, velocity_embedding from .utils import default_basis, default_size, get_components, savefig, default_arrow from .scatter import scatter from .docs import doc_scatter, doc_params from sklearn.neighbors import NearestNeighbors from scipy.stats import norm as normal from matplotlib import rcParams import matplotlib.pyplot as pl import numpy as np import pandas as pd def compute_velocity_on_grid(X_emb, V_emb, density=None, smooth=None, n_neighbors=None, min_mass=None, autoscale=True, adjust_for_stream=False): # prepare grid n_obs, n_dim = X_emb.shape density = 1 if density is None else density smooth = .5 if smooth is None else smooth grs = [] for dim_i in range(n_dim): m, M = np.min(X_emb[:, dim_i]), np.max(X_emb[:, dim_i]) m = m - .01 * np.abs(M - m) M = M + .01 * np.abs(M - m) gr = np.linspace(m, M, 50 * density) grs.append(gr) meshes_tuple = np.meshgrid(*grs) X_grid = np.vstack([i.flat for i in meshes_tuple]).T # estimate grid velocities if n_neighbors is None: n_neighbors = int(n_obs/50) nn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=-1) nn.fit(X_emb) dists, neighs = nn.kneighbors(X_grid) scale = np.mean([(g[1] - g[0]) for g in grs]) * smooth weight = normal.pdf(x=dists, scale=scale) p_mass = weight.sum(1) V_grid = (V_emb[neighs] * weight[:, :, None]).sum(1) / np.maximum(1, p_mass)[:, None] if adjust_for_stream: X_grid = np.stack([np.unique(X_grid[:, 0]), np.unique(X_grid[:, 1])]) ns = int(np.sqrt(len(V_grid[:, 0]))) V_grid = V_grid.T.reshape(2, ns, ns) mass = np.sqrt((V_grid ** 2).sum(0)) V_grid[0][mass.reshape(V_grid[0].shape) < 1e-5] = np.nan else: if min_mass is None: min_mass = np.clip(np.percentile(p_mass, 99) / 100, 1e-2, 1) X_grid, V_grid = X_grid[p_mass > min_mass], V_grid[p_mass > min_mass] if autoscale: V_grid /= 3 * quiver_autoscale(X_grid, V_grid) return X_grid, V_grid @doc_params(scatter=doc_scatter) def velocity_embedding_grid(adata, basis=None, vkey='velocity', density=None, smooth=None, min_mass=None, arrow_size=None, arrow_length=None, scale=None, autoscale=True, n_neighbors=None, X=None, V=None, X_grid=None, V_grid=None, principal_curve=False, color=None, use_raw=None, layer=None, color_map=None, colorbar=False, palette=None, size=None, alpha=.2, perc=None, sort_order=True, groups=None, components=None, projection='2d', legend_loc='none', legend_fontsize=None, legend_fontweight=None, right_margin=None, left_margin=None, xlabel=None, ylabel=None, title=None, fontsize=None, figsize=None, dpi=None, frameon=None, show=True, save=None, ax=None, **kwargs): """\ Scatter plot of velocities for the grid points on the embedding Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate vkey: `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. density: `float` (default: 1) Amount of velocities to show - 0 none to 1 all arrow_size: `float` or 3-tuple for headlength, headwidth and headaxislength (default: 1) Size of arrows. arrow_length: `float` (default: 1) Length of arrows. scale: `float` (default: 1) Length of velocities in the embedding. min_mass: `float` (default: 0.5) Minimum threshold for mass to be shown. smooth: `float` (default: 0.5) Multiplication factor for scale in Gaussian kernel around grid point. n_neighbors: `int` (default: None) Number of neighbors to consider around grid point. X: `np.ndarray` (default: None) embedding grid point coordinates V: `np.ndarray` (default: None) embedding grid velocity coordinates {scatter} Returns ------- `matplotlib.Axis` if `show==False` """ basis = default_basis(adata) if basis is None else basis colors = pd.unique(color) if isinstance(color, (list, tuple, np.record)) else [color] layers = pd.unique(layer) if isinstance(layer, (list, tuple, np.ndarray, np.record)) else [layer] vkeys = pd.unique(vkey) if isinstance(vkey, (list, tuple, np.ndarray, np.record)) else [vkey] for key in vkeys: if key + '_' + basis not in adata.obsm_keys() and V is None: velocity_embedding(adata, basis=basis, vkey=key) color, layer, vkey = colors[0], layers[0], vkeys[0] if X_grid is None or V_grid is None: X_emb = adata.obsm['X_' + basis][:, get_components(components, basis)] if X is None else X[:, :2] V_emb = adata.obsm[vkey + '_' + basis][:, get_components(components, basis)] if V is None else V[:, :2] X_grid, V_grid = compute_velocity_on_grid(X_emb=X_emb, V_emb=V_emb, density=density, autoscale=autoscale, smooth=smooth, n_neighbors=n_neighbors, min_mass=min_mass) scatter_kwargs = {"basis": basis, "perc": perc, "use_raw": use_raw, "sort_order": sort_order, "alpha": alpha, "components": components, "projection": projection, "legend_loc": legend_loc, "groups": groups, "legend_fontsize": legend_fontsize, "legend_fontweight": legend_fontweight, "palette": palette, "color_map": color_map, "frameon": frameon, "title": title, "xlabel": xlabel, "ylabel": ylabel, "right_margin": right_margin, "left_margin": left_margin, "colorbar": colorbar, "dpi": dpi, "fontsize": fontsize, "show": False, "save": None} multikey = colors if len(colors) > 1 else layers if len(layers) > 1 else vkeys if len(vkeys) > 1 else None if multikey is not None: figsize = rcParams['figure.figsize'] if figsize is None else figsize for i, gs in enumerate(pl.GridSpec(1, len(multikey), pl.figure(None, (figsize[0] * len(multikey), figsize[1]), dpi=dpi))): velocity_embedding_grid(adata, density=density, scale=scale, size=size, min_mass=min_mass, smooth=smooth, n_neighbors=n_neighbors, principal_curve=principal_curve, ax=pl.subplot(gs), arrow_size=arrow_size, arrow_length=arrow_length, color=colors[i] if len(colors) > 1 else color, layer=layers[i] if len(layers) > 1 else layer, vkey=vkeys[i] if len(vkeys) > 1 else vkey, X_grid=None if len(vkeys) > 1 else X_grid, V_grid=None if len(vkeys) > 1 else V_grid, autoscale=False if len(vkeys) > 1 else autoscale, **scatter_kwargs, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: ax = pl.figure(None, figsize, dpi=dpi).gca() if ax is None else ax hl, hw, hal = default_arrow(arrow_size) scale = 1 / arrow_length if arrow_length is not None else scale if scale is not None else 1 quiver_kwargs = {"scale": scale, "angles": 'xy', "scale_units": 'xy', "width": .001, "color": 'black', "edgecolors": 'k', "headlength": hl/2, "headwidth": hw/2, "headaxislength": hal/2, "linewidth": .2} quiver_kwargs.update(kwargs) pl.quiver(X_grid[:, 0], X_grid[:, 1], V_grid[:, 0], V_grid[:, 1], **quiver_kwargs, zorder=3) if principal_curve: curve = adata.uns['principal_curve']['projections'] pl.plot(curve[:, 0], curve[:, 1], c="w", lw=6, zorder=4) pl.plot(curve[:, 0], curve[:, 1], c="k", lw=3, zorder=5) size = 4 * default_size(adata) if size is None else size ax = scatter(adata, layer=layer, color=color, size=size, ax=ax, **scatter_kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax @doc_params(scatter=doc_scatter) def velocity_embedding_stream(adata, basis=None, vkey='velocity', density=None, smooth=None, linewidth=None, n_neighbors=None, X=None, V=None, X_grid=None, V_grid=None, color=None, use_raw=None, layer=None, color_map=None, colorbar=False, palette=None, size=None, alpha=.2, perc=None, sort_order=True, groups=None, components=None, projection='2d', legend_loc='none', legend_fontsize=None, legend_fontweight=None, right_margin=None, left_margin=None, xlabel=None, ylabel=None, title=None, fontsize=None, figsize=None, dpi=None, frameon=None, show=True, save=None, ax=None, **kwargs): """\ Stream plot of velocities computed from the grid points on the embedding Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. x: `str`, `np.ndarray` or `None` (default: `None`) x coordinate y: `str`, `np.ndarray` or `None` (default: `None`) y coordinate vkey: `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. density: `float` (default: 1) Amount of velocities to show - 0 none to 1 all smooth: `float` (default: 0.5) Multiplication factor for scale in Gaussian kernel around grid point. linewidth: `float` (default: 1) Line width for streamplot. n_neighbors: `int` (default: None) Number of neighbors to consider around grid point. X: `np.ndarray` (default: None) embedding grid point coordinates V: `np.ndarray` (default: None) embedding grid velocity coordinates {scatter} Returns ------- `matplotlib.Axis` if `show==False` """ basis = default_basis(adata) if basis is None else basis colors = pd.unique(color) if isinstance(color, (list, tuple, np.record)) else [color] layers = pd.unique(layer) if isinstance(layer, (list, tuple, np.ndarray, np.record)) else [layer] vkeys = pd.unique(vkey) if isinstance(vkey, (list, tuple, np.ndarray, np.record)) else [vkey] for key in vkeys: if key + '_' + basis not in adata.obsm_keys() and V is None: velocity_embedding(adata, basis=basis, vkey=key) color, layer, vkey = colors[0], layers[0], vkeys[0] if X_grid is None or V_grid is None: X_emb = adata.obsm['X_' + basis][:, get_components(components, basis)] if X is None else X[:, :2] V_emb = adata.obsm[vkey + '_' + basis][:, get_components(components, basis)] if V is None else V[:, :2] X_grid, V_grid = compute_velocity_on_grid(X_emb=X_emb, V_emb=V_emb, density=1, smooth=smooth, n_neighbors=n_neighbors, autoscale=False, adjust_for_stream=True) lengths = np.sqrt((V_grid ** 2).sum(0)) linewidth = 1 if linewidth is None else linewidth linewidth *= 2 * lengths / lengths[~np.isnan(lengths)].max() scatter_kwargs = {"basis": basis, "perc": perc, "use_raw": use_raw, "sort_order": sort_order, "alpha": alpha, "components": components, "projection": projection, "legend_loc": legend_loc, "groups": groups, "legend_fontsize": legend_fontsize, "legend_fontweight": legend_fontweight, "palette": palette, "color_map": color_map, "frameon": frameon, "title": title, "xlabel": xlabel, "ylabel": ylabel, "right_margin": right_margin, "left_margin": left_margin, "colorbar": colorbar, "dpi": dpi, "fontsize": fontsize, "show": False, "save": None} multikey = colors if len(colors) > 1 else layers if len(layers) > 1 else vkeys if len(vkeys) > 1 else None if multikey is not None: figsize = rcParams['figure.figsize'] if figsize is None else figsize for i, gs in enumerate(pl.GridSpec(1, len(multikey), pl.figure(None, (figsize[0] * len(multikey), figsize[1]), dpi=dpi))): velocity_embedding_stream(adata, density=density, size=size, smooth=smooth, n_neighbors=n_neighbors, linewidth=linewidth, ax=pl.subplot(gs), color=colors[i] if len(colors) > 1 else color, layer=layers[i] if len(layers) > 1 else layer, vkey=vkeys[i] if len(vkeys) > 1 else vkey, X_grid=None if len(vkeys) > 1 else X_grid, V_grid=None if len(vkeys) > 1 else V_grid, **scatter_kwargs, **kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax else: if projection == '3d': from mpl_toolkits.mplot3d import Axes3D ax = pl.figure(None, figsize, dpi=dpi).gca(projection=projection) if ax is None else ax else: ax = pl.figure(None, figsize, dpi=dpi).gca() if ax is None else ax density = 1 if density is None else density stream_kwargs = {"linewidth": linewidth, "density": 2 * density} stream_kwargs.update(kwargs) pl.streamplot(X_grid[0], X_grid[1], V_grid[0], V_grid[1], color='grey', zorder=3, **stream_kwargs) size = 4 * default_size(adata) if size is None else size ax = scatter(adata, layer=layer, color=color, size=size, ax=ax, **scatter_kwargs) if isinstance(save, str): savefig('' if basis is None else basis, dpi=dpi, save=save, show=show) if show: pl.show() else: return ax PK.smM{*2 scvelo/preprocessing/__init__.pyfrom .utils import show_proportions, cleanup, filter_genes, filter_genes_dispersion, \ normalize_per_cell, normalize_layers, log1p, filter_and_normalize, recipe_velocity from .moments import pca, neighbors, moments PKrM0}!!scvelo/preprocessing/moments.pyfrom .. import settings from .. import logging as logg from .utils import normalize_layers, X_is_logarithmized from scanpy.api import Neighbors from scanpy.api.pp import pca from scipy.sparse import csr_matrix import numpy as np def neighbors(adata, n_neighbors=30, n_pcs=30, use_rep=None, knn=True, random_state=0, method='umap', metric='euclidean', metric_kwds={}, copy=False): """ Compute a neighborhood graph of observations [McInnes18]_. The neighbor search efficiency of this heavily relies on UMAP [McInnes18]_, which also provides a method for estimating connectivities of data points - the connectivity of the manifold (`method=='umap'`). If `method=='diffmap'`, connectivities are computed according to [Coifman05]_, in the adaption of [Haghverdi16]_. Parameters ---------- adata Annotated data matrix. n_neighbors The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation. Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If `knn` is `True`, number of nearest neighbors to be searched. If `knn` is `False`, a Gaussian kernel width is set to the distance of the `n_neighbors` neighbor. n_pcs : `int` or `None` (default: None) Use this many PCs. If n_pcs==0 use .X if use_rep is None. use_rep : `None`, `'X'` or any key for `.obsm` (default: None) Use the indicated representation. If `None`, the representation is chosen automatically: for .n_vars < 50, .X is used, otherwise ‘X_pca’ is used. knn If `True`, use a hard threshold to restrict the number of neighbors to `n_neighbors`, that is, consider a knn graph. Otherwise, use a Gaussian Kernel to assign low weights to neighbors more distant than the `n_neighbors` nearest neighbor. random_state A numpy random seed. method : {{'umap', 'gauss', `None`}} (default: `'umap'`) Use 'umap' [McInnes18]_ or 'gauss' (Gauss kernel following [Coifman05]_ with adaptive width [Haghverdi16]_) for computing connectivities. metric A known metric’s name or a callable that returns a distance. metric_kwds Options for the metric. copy Return a copy instead of writing to adata. Returns ------- Depending on `copy`, updates or returns `adata` with the following: connectivities : sparse matrix (`.uns['neighbors']`, dtype `float32`) Weighted adjacency matrix of the neighborhood graph of data points. Weights should be interpreted as connectivities. distances : sparse matrix (`.uns['neighbors']`, dtype `float32`) Instead of decaying weights, this stores distances for each pair of neighbors. """ logg.info('computing neighbors', r=True) adata = adata.copy() if copy else adata if adata.isview: adata._init_as_actual(adata.copy()) neighbors = Neighbors(adata) neighbors.compute_neighbors(n_neighbors=n_neighbors, knn=knn, n_pcs=n_pcs, use_rep=use_rep, method=method, metric=metric, metric_kwds=metric_kwds, random_state=random_state) adata.uns['neighbors'] = {} adata.uns['neighbors']['params'] = {'n_neighbors': n_neighbors, 'method': method} adata.uns['neighbors']['distances'] = neighbors.distances adata.uns['neighbors']['connectivities'] = neighbors.connectivities logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added to `.uns[\'neighbors\']`\n' ' \'distances\', weighted adjacency matrix\n' ' \'connectivities\', weighted adjacency matrix') return adata if copy else None def get_connectivities(adata, mode='connectivities', recurse_neighbors=False): connectivities = adata.uns['neighbors'][mode] > 0 connectivities.setdiag(1) if recurse_neighbors: connectivities += connectivities.dot(connectivities * .5) connectivities.data = np.clip(connectivities.data, 0, 1) connectivities = connectivities.multiply(1. / connectivities.sum(1)) return connectivities.tocsr().astype(np.float32) def moments(data, n_neighbors=30, n_pcs=30, mode='connectivities', use_rep=None, recurse_neighbors=False, renormalize=False, plot=False, copy=False): """Computes first order moments for velocity estimation. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. n_neighbors: `int` (default: 30) Number of neighbors to use. n_pcs: `int` (default: 30) Number of principal components to use. mode: `'connectivities'` or `'distances'` (default: `'connectivities'`) Distance metric to use for moment computation. renormalize: `bool` (default: `False`) Renormalize the moments by total counts per cell to its median. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes Ms: `.layers` dense matrix with first order moments of spliced counts. Mu: `.layers` dense matrix with first order moments of unspliced counts. """ adata = data.copy() if copy else data if 'spliced' not in adata.layers.keys() or 'unspliced' not in adata.layers.keys(): raise ValueError('Could not find spliced / unspliced counts.') if not X_is_logarithmized(adata): logg.info('Consider to logarithmize adata.X with `scv.pp.log1p` for better results.') if 'neighbors' not in adata.uns.keys() or n_neighbors > adata.uns['neighbors']['params']['n_neighbors']: if 'X_pca' not in adata.obsm.keys() or n_pcs > adata.obsm['X_pca'].shape[1]: pca(adata, n_comps=n_pcs, svd_solver='arpack') if plot: from scanpy.plotting.tools import pca_variance_ratio pca_variance_ratio(adata) neighbors(adata, n_neighbors=n_neighbors, use_rep=('X_pca' if use_rep is None else use_rep), n_pcs=n_pcs) if mode not in adata.uns['neighbors']: raise ValueError('mode can only be \'connectivities\' or \'distances\'') logg.info('computing moments based on ' + str(mode), r=True) normalize_layers(adata) connectivities = get_connectivities(adata, mode, recurse_neighbors=recurse_neighbors) adata.layers['Ms'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['spliced'])).astype(np.float32).A adata.layers['Mu'] = csr_matrix.dot(connectivities, csr_matrix(adata.layers['unspliced'])).astype(np.float32).A if renormalize: normalize_layers(adata, layers={'Ms', 'Mu'}) logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added \n' ' \'Ms\' and \'Mu\', moments of spliced/unspliced abundances (adata.layers)') return adata if copy else None def second_order_moments(adata, adjusted=False): """Computes second order moments for stochastic velocity estimation. Arguments --------- adata: `AnnData` Annotated data matrix. Returns ------- Mss: Second order moments for spliced abundances Mus: Second order moments for spliced with unspliced abundances """ if 'neighbors' not in adata.uns: raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.') connectivities = get_connectivities(adata) s, u = csr_matrix(adata.layers['spliced']), csr_matrix(adata.layers['unspliced']) Mss = csr_matrix.dot(connectivities, s.multiply(s)).astype(np.float32).A Mus = csr_matrix.dot(connectivities, s.multiply(u)).astype(np.float32).A if adjusted: Mss = 2 * Mss - adata.layers['Ms'].reshape(Mss.shape) Mus = 2 * Mus - adata.layers['Mu'].reshape(Mus.shape) return Mss, Mus def second_order_moments_u(adata): """Computes second order moments for stochastic velocity estimation. Arguments --------- adata: `AnnData` Annotated data matrix. Returns ------- Muu: Second order moments for unspliced abundances """ if 'neighbors' not in adata.uns: raise ValueError('You need to run `pp.neighbors` first to compute a neighborhood graph.') connectivities = get_connectivities(adata) u = csr_matrix(adata.layers['unspliced']) Muu = csr_matrix.dot(connectivities, u.multiply(u)).astype(np.float32).A return Muu PKgrMIIscvelo/preprocessing/utils.pyfrom .. import logging as logg import numpy as np from scipy.sparse import issparse, csr_matrix from scanpy.api.pp import log1p def show_proportions(adata): """Fraction of spliced/unspliced/ambiguous abundances Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. Returns ------- Prints the fractions of abundances. """ layers_keys = [key for key in ['spliced', 'unspliced', 'ambiguous'] if key in adata.layers.keys()] tot_mol_cell_layers = [adata.layers[key].sum(1) for key in layers_keys] mean_abundances = np.round( [np.mean(tot_mol_cell / np.sum(tot_mol_cell_layers, 0)) for tot_mol_cell in tot_mol_cell_layers], 2) print('Abundance of ' + str(layers_keys) + ': ' + str(mean_abundances)) def cleanup(data, clean='layers', keep=None, copy=False): """Deletes attributes not needed. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. clean: `str` or list of `str` (default: `layers`) Which attributes to consider for freeing memory. keep: `str` or list of `str` (default: `['spliced', unspliced']`) Which attributes to keep. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with selection of attributes kept. """ adata = data.copy() if copy else data keep = list() if keep is None else list(keep) keep.extend(['spliced', 'unspliced', 'Ms', 'Mu', 'clusters', 'neighbors']) if any(['obs' in clean, 'all' in clean]): for key in list(adata.obs.keys()): if key not in keep: del adata.obs[key] if any(['var' in clean, 'all' in clean]): for key in list(adata.var.keys()): if key not in keep: del adata.var[key] if any(['uns' in clean, 'all' in clean]): for key in list(adata.uns.keys()): if key not in keep: del adata.uns[key] if any(['layers' in clean, 'all' in clean]): for key in list(adata.layers.keys()): # remove layers that are not needed if key not in keep: del adata.layers[key] return adata if copy else None def X_is_logarithmized(adata): idx = np.random.choice(adata.n_obs, 300) X, S = adata.X[idx], adata.layers['spliced'][idx].copy() if issparse(S) or issparse(X): S, S_log = csr_matrix(S), csr_matrix(X) S.data = 1 / np.log1p(S.data) W = S_log.multiply(S) W.data /= np.mean(W.data) val = max(np.max(W.data), np.max(1 / W.data)) else: S = 1 / np.log1p(S) S[np.isinf(S)] = 0 W = X * S W /= np.mean(W[W > 0]) W_inv = 1 / W W_inv[np.isinf(W_inv)] = 0 val = max(np.max(W), np.max(W_inv)) return val < 3 # approx. 3 * standard deviation def get_size(adata, layer): X = adata.layers[layer] return X.sum(1).A1.copy() if issparse(X) else X.sum(1).copy() def set_initial_size(adata, layers={'spliced', 'unspliced'}): if all([layer in adata.layers.keys() for layer in layers]): layers = [layer for layer in layers if 'initial_size_' + layer not in adata.obs.keys()] total_size = 0 for layer in layers: size = get_size(adata, layer) adata.obs['initial_size_' + layer] = size total_size += size if 'initial_size' not in adata.obs.keys(): adata.obs['initial_size'] = total_size def get_initial_size(adata, layer, by_total_size=None): if layer not in {'spliced', 'unspliced'}: return None else: return adata.obs['initial_size'].copy() if by_total_size and 'initial_size' in adata.obs.keys() else \ adata.obs['initial_size_' + layer].copy() if 'initial_size_' + layer in adata.obs.keys() else get_size(adata, layer) def filter(X, min_counts=None, min_cells=None, max_counts=None, max_cells=None): counts = X if (min_counts is not None or max_counts is not None) else X > 0 counts = counts.sum(0).A1 if issparse(counts) else counts.sum(0) lb = min_counts if min_counts is not None else min_cells if min_cells is not None else -np.inf ub = max_counts if max_counts is not None else max_cells if max_cells is not None else np.inf return (lb <= counts) & (counts <= ub), counts def filter_genes(data, min_counts=None, min_cells=None, max_counts=None, max_cells=None, min_counts_u=None, min_cells_u=None, max_counts_u=None, max_cells_u=None, copy=False): """Filter genes based on number of cells or counts. Keep genes that have at least `min_counts` counts or are expressed in at least `min_cells` cells or have at most `max_counts` counts or are expressed in at most `max_cells` cells. Only provide one of the optional parameters `min_counts`, `min_cells`, `max_counts`, `max_cells` per call. Parameters ---------- data : :class:`~anndata.AnnData`, `np.ndarray`, `sp.spmatrix` The (annotated) data matrix of shape `n_obs` × `n_vars`. Rows correspond to cells and columns to genes. min_counts : `int`, optional (default: `None`) Minimum number of counts required for a gene to pass filtering. min_cells : `int`, optional (default: `None`) Minimum number of cells expressed required for a gene to pass filtering. max_counts : `int`, optional (default: `None`) Maximum number of counts required for a gene to pass filtering. max_cells : `int`, optional (default: `None`) Maximum number of cells expressed required for a gene to pass filtering. min_counts_u : `int`, optional (default: `None`) Minimum number of unspliced counts required for a gene to pass filtering. min_cells_u : `int`, optional (default: `None`) Minimum number of unspliced cells expressed required for a gene to pass filtering. max_counts_u : `int`, optional (default: `None`) Maximum number of unspliced counts required for a gene to pass filtering. max_cells_u : `int`, optional (default: `None`) Maximum number of unspliced cells expressed required for a gene to pass filtering. copy : `bool`, optional (default: `False`) Determines whether a copy is returned. Returns ------- Filters the object and adds `n_counts` to `adata.var`. """ adata = data.copy() if copy else data # set initial cell sizes before filtering set_initial_size(adata) def filter(X, min_counts=None, min_cells=None, max_counts=None, max_cells=None): counts = X if (min_counts is not None or max_counts is not None) else X > 0 counts = counts.sum(0).A1 if issparse(counts) else counts.sum(0) lb = min_counts if min_counts is not None else min_cells if min_cells is not None else -np.inf ub = max_counts if max_counts is not None else max_cells if max_cells is not None else np.inf return (lb <= counts) & (counts <= ub), counts gene_subset = np.ones(adata.n_vars, dtype=bool) if min_counts is not None or max_counts is not None: gene_subset, counts = filter(adata.X, min_counts=min_counts, max_counts=max_counts) adata.var['n_counts'] = counts adata._inplace_subset_var(gene_subset) if min_cells is not None or max_cells is not None: gene_subset, cells = filter(adata.X, min_cells=min_cells, max_cells=max_cells) adata.var['n_cells'] = cells adata._inplace_subset_var(gene_subset) s = np.sum(~gene_subset) if s > 0: logg.info('filtered out {} genes that are detected'.format(s), end=' ') if min_cells is not None or min_counts is not None: logg.info('in less than', str(min_cells) + ' cells' if min_counts is None else str(min_counts) + ' counts', no_indent=True) if max_cells_u is not None or max_counts_u is not None: logg.info('in more than ', str(max_cells) + ' cells' if max_counts is None else str(max_counts) + ' counts', no_indent=True) layer = 'unspliced' if layer in adata.layers.keys(): gene_subset = np.ones(adata.n_vars, dtype=bool) if min_counts_u is not None or max_counts is not None: gene_subset, _ = filter(adata.layers[layer], min_counts=min_counts_u, max_counts=max_counts_u) adata._inplace_subset_var(gene_subset) if min_cells_u is not None or max_cells_u is not None: gene_subset, _ = filter(adata.layers[layer], min_cells=min_cells_u, max_cells=max_cells_u) adata._inplace_subset_var(gene_subset) s = np.sum(~gene_subset) if s > 0: logg.info('filtered out {} genes that are detected'.format(s), end=' ') if min_cells_u is not None or min_counts_u is not None: logg.info('in less than', str(min_cells_u) + ' cells (' + str(layer) + ')' if min_counts_u is None else str(min_counts_u) + ' counts(' + str(layer) + ')', no_indent=True) if max_cells_u is not None or max_counts_u is not None: logg.info('in more than ', str(max_cells_u) + ' cells(' + str(layer) + ')' if max_counts_u is None else str(max_counts_u) + ' counts(' + str(layer) + ')', no_indent=True) return adata if copy else None def filter_genes_dispersion(data, flavor='seurat', min_disp=None, max_disp=None, min_mean=None, max_mean=None, n_bins=20, n_top_genes=None, log=True, copy=False): """Extract highly variable genes. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected. Parameters ---------- data : :class:`~anndata.AnnData`, `np.ndarray`, `sp.sparse` The (annotated) data matrix of shape `n_obs` × `n_vars`. Rows correspond to cells and columns to genes. flavor : {'seurat', 'cell_ranger', 'svr'}, optional (default: 'seurat') Choose the flavor for computing normalized dispersion. If choosing 'seurat', this expects non-logarithmized data - the logarithm of mean and dispersion is taken internally when `log` is at its default value `True`. For 'cell_ranger', this is usually called for logarithmized data - in this case you should set `log` to `False`. In their default workflows, Seurat passes the cutoffs whereas Cell Ranger passes `n_top_genes`. min_mean=0.0125, max_mean=3, min_disp=0.5, max_disp=`None` : `float`, optional If `n_top_genes` unequals `None`, these cutoffs for the means and the normalized dispersions are ignored. n_bins : `int` (default: 20) Number of bins for binning the mean gene expression. Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1. You'll be informed about this if you set `settings.verbosity = 4`. n_top_genes : `int` or `None` (default: `None`) Number of highly-variable genes to keep. log : `bool`, optional (default: `True`) Use the logarithm of the mean to variance ratio. copy : `bool`, optional (default: `False`) If an :class:`~anndata.AnnData` is passed, determines whether a copy is returned. Returns ------- If an AnnData `adata` is passed, returns or updates `adata` depending on \ `copy`. It filters the `adata` and adds the annotations """ adata = data.copy() if copy else data set_initial_size(adata) if n_top_genes is not None and adata.n_vars < n_top_genes: logg.info('Skip filtering by dispersion since number of variables are less than `n_top_genes`') else: if flavor == 'svr': mu = adata.X.mean(0).A1 if issparse(adata.X) else adata.X.mean(0) sigma = np.sqrt(adata.X.multiply(adata.X).mean(0).A1 - mu ** 2) if issparse(adata.X) else adata.X.std(0) log_mu = np.log2(mu) log_cv = np.log2(sigma / mu) from sklearn.svm import SVR clf = SVR(gamma=150. / len(mu)) clf.fit(log_mu[:, None], log_cv) score = log_cv - clf.predict(log_mu[:, None]) nth_score = np.sort(score)[::-1][n_top_genes] adata._inplace_subset_var(score >= nth_score) else: from scanpy.api.pp import filter_genes_dispersion filter_genes_dispersion(adata, flavor=flavor, min_disp=min_disp, max_disp=max_disp, min_mean=min_mean, max_mean=max_mean, n_bins=n_bins, n_top_genes=n_top_genes, log=log) return adata if copy else None def counts_per_cell_quantile(X, max_proportion_per_cell=.05, counts_per_cell=None): if counts_per_cell is None: counts_per_cell = X.sum(1).A1 if issparse(X) else X.sum(1) gene_subset = np.all(X <= counts_per_cell[:, None] * max_proportion_per_cell, axis=0) if issparse(X): gene_subset = gene_subset.A1 return X[:, gene_subset].sum(1).A1 if issparse(X) else X[:, gene_subset].sum(1) def normalize_layers(data, layers={'spliced', 'unspliced'}, counts_per_cell_after=None, max_proportion_per_cell=None, by_total_size=None, copy=False): """Normalize by total counts to median. """ adata = data.copy() if copy else data from scanpy.api.pp import normalize_per_cell def not_normalized_yet(adata, layer): X = adata.layers[layer] return np.allclose((X.data[:10] if issparse(X) else X[0]) % 1, 0, atol=1e-3) for layer in layers: if not_normalized_yet(adata, layer): counts_per_cell = get_initial_size(adata, layer, by_total_size) if max_proportion_per_cell is not None and (0 < max_proportion_per_cell < 1): counts_per_cell = counts_per_cell_quantile(adata.X, max_proportion_per_cell, counts_per_cell) counts_per_cell += counts_per_cell == 0 adata.layers[layer] = normalize_per_cell(adata.layers[layer], counts_per_cell_after, counts_per_cell, copy=True) return adata if copy else None def normalize_per_cell(data, counts_per_cell_after=None, counts_per_cell=None, key_n_counts=None, max_proportion_per_cell=None, layers={'spliced', 'unspliced'}, copy=False): """Normalize each cell by total counts over all genes. Parameters ---------- data : :class:`~anndata.AnnData`, `np.ndarray`, `sp.sparse` The (annotated) data matrix of shape `n_obs` × `n_vars`. Rows correspond to cells and columns to genes. counts_per_cell_after : `float` or `None`, optional (default: `None`) If `None`, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization. counts_per_cell : `np.array`, optional (default: `None`) Precomputed counts per cell. key_n_counts : `str`, optional (default: `'n_counts'`) Name of the field in `adata.obs` where the total counts per cell are stored. max_proportion_per_cell : `int` (default: `None`) Exclude genes counts that account for more than a specific proportion of cell size, e.g. 0.05. layers : `str` or `list` (default: `{'spliced', 'unspliced'}`) Keys for layers to be also considered for normalization. copy : `bool`, optional (default: `False`) If an :class:`~anndata.AnnData` is passed, determines whether a copy is returned. Returns ------- Returns or updates `adata` with normalized version of the original `adata.X`, depending on `copy`. """ adata = data.copy() if copy else data from scanpy.api.pp import normalize_per_cell if max_proportion_per_cell is not None and (0 < max_proportion_per_cell < 1): counts_per_cell = counts_per_cell_quantile(adata.X, max_proportion_per_cell) normalize_per_cell(adata, counts_per_cell_after, counts_per_cell, key_n_counts) normalize_layers(adata, layers, counts_per_cell_after, max_proportion_per_cell) return adata if copy else None def filter_and_normalize(data, min_counts=None, min_counts_u=None, min_cells=None, min_cells_u=None, n_top_genes=None, flavor='seurat', log=True, copy=False): """Filtering, normalization and log transform Expects non-logarithmized data. If using logarithmized data, pass `log=False`. Runs the following steps .. code:: python scv.pp.filter_genes(adata) scv.pp.normalize_per_cell(adata) if n_top_genes is not None: scv.pp.filter_genes_dispersion(adata) if log: scv.pp.log1p(adata) Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. min_counts: `int` (default: `None`) Minimum number of counts required for a gene to pass filtering (spliced). min_counts_u: `int` (default: `None`) Minimum number of counts required for a gene to pass filtering (unspliced). min_cells: `int` (default: `None`) Minimum number of cells expressed required for a gene to pass filtering (spliced). min_cells_u: `int` (default: `None`) Minimum number of cells expressed required for a gene to pass filtering (unspliced). n_top_genes: `int` (default: `None`) Number of genes to keep. flavor: {'seurat', 'cell_ranger', 'svr'}, optional (default: 'seurat') Choose the flavor for computing normalized dispersion. If choosing 'seurat', this expects non-logarithmized data. log: `bool` (default: `True`) Take logarithm. copy: `bool` (default: `False`) Return a copy of `adata` instead of updating it. Returns ------- Returns or updates `adata` depending on `copy`. """ adata = data.copy() if copy else data filter_genes(adata, min_counts=min_counts, min_counts_u=min_counts_u, min_cells=min_cells, min_cells_u=min_cells_u) normalize_per_cell(adata) if n_top_genes is not None: filter_genes_dispersion(adata, n_top_genes=n_top_genes, flavor=flavor) if log: log1p(adata) return adata if copy else None def recipe_velocity(adata, min_counts=3, min_counts_u=3, n_top_genes=None, n_pcs=30, n_neighbors=30, log=True, copy=False): """Runs pp.filter_and_normalize() and pp.moments() """ from .moments import moments filter_and_normalize(adata, min_counts=min_counts, min_counts_u=min_counts_u, n_top_genes=n_top_genes, log=log) moments(adata, n_neighbors=n_neighbors, n_pcs=n_pcs) return adata if copy else None PK)VMAOscvelo/tools/__init__.pyfrom .velocity import velocity from .velocity_graph import velocity_graph from .transition_matrix import transition_matrix from .velocity_embedding import velocity_embedding from .velocity_confidence import velocity_confidence, velocity_confidence_transition from .terminal_states import eigs, terminal_states from .rank_velocity_genes import rank_velocity_genes from scanpy.api.tl import tsne, umap, diffmap, louvain, paga PKdpMazVJJscvelo/tools/optimization.pyfrom .utils import prod_sum_obs, mean, make_dense from scipy.optimize import minimize from scipy.sparse import csr_matrix import numpy as np import warnings def weight_input(x, y, perc=None): if perc is not None: xy_norm = x / np.clip(x.max(0), 1e-3, None) + y / np.clip(y.max(0), 1e-3, None) W = csr_matrix(xy_norm >= np.percentile(xy_norm, perc, axis=0)) x, y = W.multiply(x).tocsr(), W.multiply(y).tocsr() return x, y def leastsq_NxN(x, y, fit_offset=False, perc=None): """Solution to least squares: gamma = cov(X,Y) / var(X) """ x, y = weight_input(x, y, perc) n_obs, n_var = x.shape with warnings.catch_warnings(): warnings.simplefilter("ignore") if fit_offset: cov_xy, cov_xx = prod_sum_obs(x, y) / n_obs, prod_sum_obs(x, x) / n_obs mean_x, mean_y = mean(x), mean(y) numerator_offset = cov_xx * mean_y - cov_xy * mean_x numerator_gamma = cov_xy - mean_x * mean_y offset, gamma = (numerator_offset, numerator_gamma) / (cov_xx - mean_x * mean_x) else: offset, gamma = np.zeros(n_var), prod_sum_obs(x, y) / prod_sum_obs(x, x) offset[np.isnan(offset)] = 0 gamma[np.isnan(gamma)] = 0 return offset, gamma def leastsq_generalized(x, y, x2, y2, res_std=None, res2_std=None, fit_offset=False, fit_offset2=False, perc=None): """Solution to the 2-dim generalized least squares: gamma = inv(X'QX)X'QY """ x, y = weight_input(x, y, perc) n_obs, n_var = x.shape offset, offset_ss = np.zeros(n_var, dtype="float32"), np.zeros(n_var, dtype="float32") gamma = np.ones(n_var, dtype="float32") if (res_std is None) or (res2_std is None): res_std, res2_std = np.ones(n_var), np.ones(n_var) ones, zeros = np.ones(n_obs), np.zeros(n_obs) with warnings.catch_warnings(): warnings.simplefilter("ignore") x, y = np.vstack((make_dense(x)/res_std, x2/res2_std)), np.vstack((make_dense(y)/res_std, y2/res2_std)) if fit_offset and fit_offset2: for i in range(n_var): A = np.c_[np.vstack((np.c_[ones/res_std[i], zeros], np.c_[zeros, ones/res2_std[i]])), x[:, i]] offset[i], offset_ss[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) elif fit_offset: for i in range(n_var): A = np.c_[np.hstack((ones/res_std[i], zeros)), x[:, i]] offset[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) elif fit_offset2: for i in range(n_var): A = np.c_[np.hstack((zeros, ones/res2_std[i])), x[:, i]] offset_ss[i], gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) else: for i in range(n_var): A = np.c_[x[:, i]] gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i])) offset[np.isnan(offset)] = 0 offset_ss[np.isnan(offset_ss)] = 0 gamma[np.isnan(gamma)] = 0 return offset, offset_ss, gamma def maximum_likelihood(Ms, Mu, Mus, Mss, fit_offset=False, fit_offset2=False): """Maximizing the log likelihood using weights according to empirical bayes """ n_obs, n_var = Ms.shape offset, offset_ss = np.zeros(n_var, dtype="float32"), np.zeros(n_var, dtype="float32") gamma = np.ones(n_var, dtype="float32") def sse(A, data, b): sigma = (A.dot(data) - b).std(1) return np.log(sigma).sum() # np.log(np.sqrt(2*np.pi) * sigma).sum() + (.5 * (res/sigma[:, None])**2).sum() if fit_offset and fit_offset2: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset[i], offset_ss[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[2], 0, 0], [1, m[2], 2, -2 * m[2]]]), data, b=np.array(m[0], m[1])), x0=(1e-4, 1e-4, 1), method="L-BFGS-B").x elif fit_offset: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[1], 0, 0], [1, m[1], 2, -2 * m[1]]]), data, b=np.array(m[0], 0)), x0=(1e-4, 1), method="L-BFGS-B").x elif fit_offset2: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) offset_ss[i], gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m[1], 0, 0], [1, m[1], 2, -2 * m[1]]]), data, b=np.array(0, m[0])), x0=(1e-4, 1), method="L-BFGS-B").x else: for i in range(n_var): data = np.vstack((Mu[:, i], Ms[:, i], Mus[:, i], Mss[:, i])) gamma[i] = \ minimize(lambda m: sse(np.array([[1, -m, 0, 0], [1, m, 2, -2 * m]]), data, b=0), x0=gamma[i], method="L-BFGS-B").x return offset, offset_ss, gamma PKIuMp>TF#scvelo/tools/rank_velocity_genes.pyfrom .. import settings from .. import logging as logg from scipy.sparse import issparse import numpy as np def get_mean_var(X, ignore_zeros=False, perc=None): data = X.data if issparse(X) else X mask_nans = np.isnan(data) | np.isinf(data) | np.isneginf(data) n_nonzeros = (X != 0).sum(0) n_counts = n_nonzeros if ignore_zeros else X.shape[0] if mask_nans.sum() > 0: if issparse(X): data[np.isnan(data) | np.isinf(data) | np.isneginf(data)] = 0 n_nans = n_nonzeros - (X != 0).sum(0) else: X[mask_nans] = 0 n_nans = mask_nans.sum(0) n_counts -= n_nans if perc is not None: if isinstance(perc, int): perc = [perc, 100] if perc < 50 else [0, perc] lb, ub = np.percentile(data, perc) data = np.clip(data, lb, ub) mean = (X.sum(0) / n_counts).A1 if issparse(X) else X.sum(0) / n_counts mean_sq = (X.multiply(X).sum(0) / n_counts).A1 if issparse(X) else np.multiply(X, X).sum(0) / n_counts var = (mean_sq - mean ** 2) * (X.shape[0] / (X.shape[0] - 1)) mean = np.nan_to_num(mean) var = np.nan_to_num(var) return mean, var def select_groups(adata, groups='all', key='louvain'): """Get subset of groups in adata.obs[key]. """ if isinstance(groups, list) and isinstance(groups[0], int): groups = [str(n) for n in groups] categories = adata.obs[key].cat.categories groups_masks = np.array([categories[i] == adata.obs[key].values for i, name in enumerate(categories)]) if groups == 'all': groups = categories.values else: groups_ids = [np.where(categories.values == name)[0][0] for name in groups] groups_masks = groups_masks[groups_ids] groups = categories[groups_ids].values return groups, groups_masks def velocity_clusters(data, vkey='velocity', copy=False): adata = data.copy() if copy else data logg.info('computing velocity clusters', r=True) from .. import AnnData vdata = AnnData(adata.layers[vkey]) vdata.obs = adata.obs.copy() vdata.var = adata.var.copy() tmp_filter = np.ones(vdata.n_vars, dtype=bool) if 'velocity_genes' in vdata.var.keys(): tmp_filter &= vdata.var['velocity_genes'] if 'unspliced' in vdata.layers.keys(): n_counts = (vdata.layers['unspliced'] > 0).sum(0) n_counts = n_counts.A1 if issparse(vdata.layers['unspliced']) else n_counts min_counts = min(50, np.percentile(n_counts, 50)) tmp_filter &= (n_counts > min_counts) if 'r2' in vdata.var.keys(): r2 = vdata.var.velocity_r2 min_r2 = np.percentile(r2[r2 > 0], 50) tmp_filter &= (r2 > min_r2) if 'dispersions_norm' in vdata.var.keys(): dispersions = vdata.var.dispersions_norm min_dispersion = np.percentile(dispersions, 20) tmp_filter &= (dispersions > min_dispersion) vdata._inplace_subset_var(tmp_filter) import scanpy.api as sc verbosity_tmp = sc.settings.verbosity sc.settings.verbosity = 0 sc.pp.pca(vdata, n_comps=20, svd_solver='arpack') sc.pp.neighbors(vdata, n_pcs=20) sc.tl.louvain(vdata) sc.settings.verbosity = verbosity_tmp adata.obs[vkey + '_clusters'] = vdata.obs['louvain'] logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added \n' ' \'' + vkey + '_clusters\', clusters based on modularity on velocity field (adata.obs)') return vdata.obs['louvain'] def rank_velocity_genes(data, vkey='velocity', n_genes=10, groupby=None, min_counts=None, min_r2=None, min_dispersion=None, copy=False): """Rank genes for characterizing groups according to unspliced/spliced correlation and differential expression. Arguments ---------- data : :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Key of velocities computed in `tl.velocity` n_genes : `int`, optional (default: 100) The number of genes that appear in the returned tables. groupby: `str`, `list` or `np.ndarray` (default: `None`) Key of observations grouping to consider. min_counts: `float` (default: None) Minimum count of genes for consideration. min_r2: `float` (default: None) Minimum r2 value of genes for consideration. min_dispersion: `float` (default: None) Minimum dispersion norm value of genes for consideration. copy: `bool` (default: `False`) Return a copy instead of writing to data. Returns ------- Returns or updates `data` with the attributes rank_velocity_genes : `.uns` Structured array to be indexed by group id storing the gene names. Ordered according to scores. velocity_score : `.var` Storing the score for each gene for each group. Ordered according to scores. """ adata = data.copy() if copy else data if groupby is None: velocity_clusters(adata, vkey=vkey) groupby = 'velocity_clusters' logg.info('ranking velocity genes', r=True) tmp_filter = np.ones(adata.n_vars, dtype=bool) if 'velocity_genes' in adata.var.keys(): tmp_filter &= adata.var['velocity_genes'] if 'unspliced' in adata.layers.keys(): n_counts = (adata.layers['unspliced'] > 0).sum(0) n_counts = n_counts.A1 if issparse(adata.layers['unspliced']) else n_counts min_counts = min(50, np.percentile(n_counts, 50)) if min_counts is None else min_counts tmp_filter &= (n_counts > min_counts) if 'r2' in adata.var.keys(): r2 = adata.var.velocity_r2 min_r2 = .1 if min_r2 is None else min_r2 # np.percentile(r2[r2 > 0], 50) tmp_filter &= (r2 > min_r2) if 'dispersions_norm' in adata.var.keys(): dispersions = adata.var.dispersions_norm min_dispersion = 0 if min_dispersion is None else min_dispersion # np.percentile(dispersions, 20) tmp_filter &= (dispersions > min_dispersion) X = adata[:, tmp_filter].layers[vkey] groups, groups_masks = select_groups(adata[:, tmp_filter], key=groupby) n_groups = groups_masks.shape[0] sizes = groups_masks.sum(1) mean, var = np.zeros((n_groups, X.shape[1])), np.zeros((n_groups, X.shape[1])) for i, mask in enumerate(groups_masks): mean[i], var[i] = get_mean_var(X[mask]) # test each against the union of all other groups rankings_gene_names, rankings_gene_scores, indices = [], [], [] for i in range(n_groups): mask_rest = ~groups_masks[i] mean_rest, var_rest = get_mean_var(X[mask_rest]) size_rest = sizes[i] # else mask_rest.sum() if method == 't-test' scores = (mean[i] - mean_rest) / np.sqrt(var[i] / sizes[i] + var_rest / size_rest) scores = np.nan_to_num(scores) # equivalent to but much faster than np.argsort(scores)[-10:] if n_genes > X.shape[1]: n_genes = X.shape[1] idx = np.argpartition(scores, -n_genes)[-n_genes:] idx = idx[np.argsort(scores[idx])[::-1]] rankings_gene_names.append(adata[:, tmp_filter].var_names[idx].values) rankings_gene_scores.append(scores[idx]) rankings_gene_names = np.array([list(n) for n in rankings_gene_names]) rankings_gene_scores = np.array([list(n) for n in rankings_gene_scores]) all_names = rankings_gene_names.T.flatten() all_scores = rankings_gene_scores.T.flatten() vscore = np.zeros(adata.n_vars, dtype=np.int) for i, name in enumerate(adata.var_names): if name in all_names: vscore[i] = all_scores[np.where(name == all_names)[0][0]] adata.var['velocity_score'] = vscore key = 'rank_velocity_genes' if key not in adata.uns.keys(): adata.uns[key] = {} adata.uns[key] = {'groups': groups, 'names': rankings_gene_names, 'scores': rankings_gene_scores.round(0)} logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added \n' ' \'' + key + '\', sorted scores by group ids (adata.uns)') return adata if copy else None PKpMqvuscvelo/tools/run.pyfrom ..preprocessing import filter_and_normalize, moments from . import velocity, velocity_graph, velocity_embedding def run_all(data, basis=None, mode='deterministic', min_counts=3, n_pcs=30, n_neighbors=30, copy=False): from time import time start = time() adata = data.copy() if copy else data filter_and_normalize(adata, min_counts=min_counts) print("Number of genes to be used:", adata.X.shape[1]) moments(adata, n_neighbors=n_neighbors, n_pcs=n_pcs) velocity(adata, mode=mode) print("Number of genes to be used:", adata.var.velocity_genes.sum()) velocity_graph(adata) if basis is not None: velocity_embedding(adata, basis=basis) print('Total time (seconds): ' + str(round(time() - start, 2))) return adata if copy else None def convert_to_adata(vlm, basis=None): from collections import OrderedDict from .. import AnnData X = vlm.S_norm.T if hasattr(vlm, 'S_norm') else vlm.S_sz.T if hasattr(vlm, 'S_sz') else vlm.S.T # .sparse().T.tocsr() layers = OrderedDict() layers['spliced'] = vlm.S_sz.T if hasattr(vlm, 'S_sz') else vlm.S.T layers['unspliced'] = vlm.U_sz.T if hasattr(vlm, 'U_sz') else vlm.U.T if hasattr(vlm, 'A') and (vlm.A.T.shape == layers['spliced'].shape): layers['ambiguous'] = vlm.A.T if hasattr(vlm, 'velocity'): layers['velocity'] = vlm.velocity.T if hasattr(vlm, 'Sx_sz'): layers['Ms'] = vlm.Sx_sz.T if hasattr(vlm, 'Ux_sz'): layers['Mu'] = vlm.Ux_sz.T obs = dict(vlm.ca) if 'CellID' in obs.keys(): obs['obs_names'] = obs.pop('CellID') var = dict(vlm.ra) if 'Gene' in var.keys(): var['var_names'] = var.pop('Gene') if hasattr(vlm, 'q'): var['velocity_offset'] = vlm.q if hasattr(vlm, 'gammas'): var['velocity_gamma'] = vlm.gammas if hasattr(vlm, 'R2'): var['velocity_r2'] = vlm.R2 adata = AnnData(X, obs=obs, var=var, layers=layers) if hasattr(vlm, 'corrcoef'): adata.uns['velocity_graph'] = vlm.corrcoef if basis is not None: if hasattr(vlm, 'ts'): adata.obsm['X_' + basis] = vlm.ts if hasattr(vlm, 'delta_embedding'): adata.obsm['velocity_' + basis] = vlm.delta_embedding return adata def convert_to_loom(adata, basis=None): from scipy.sparse import issparse import numpy as np import velocyto class VelocytoLoom(velocyto.VelocytoLoom): def __init__(self, adata, basis=None): self.S = adata.layers['spliced'].T self.U = adata.layers['unspliced'].T self.S = self.S.A if issparse(self.S) else self.S self.U = self.U.A if issparse(self.U) else self.U if 'ambiguous' in adata.layers.keys(): self.A = adata.layers['ambiguous'].T if issparse(self.A): self.A = self.A.A self.initial_cell_size = self.S.sum(0) self.initial_Ucell_size = self.U.sum(0) self.ca = dict() self.ra = dict() self.ca['CellID'] = np.array(adata.obs_names) self.ra['Gene'] = np.array(adata.var_names) for key in adata.obs.keys(): self.ca[key] = np.array(adata.obs[key].values) for key in adata.var.keys(): self.ra[key] = np.array(adata.var[key].values) if isinstance(basis, str) and 'X_' + basis in adata.obsm.keys(): if 'X_' + basis in adata.obsm.keys(): self.ts = adata.obsm['X_' + basis] if 'velocity_' + basis in adata.obsm.keys(): self.delta_embedding = adata.obsm['velocity_' + basis] if 'clusters' in self.ca: self.set_clusters(self.ca['clusters']) elif 'louvain' in self.ca: self.set_clusters(self.ca['louvain']) def filter_and_normalize(self, min_counts=3, min_counts_u=3, min_cells=0, min_cells_u=0, n_top_genes=None, max_expr_avg=20): # counterpart to scv.pp.filter_and_normalize() self.score_detection_levels(min_expr_counts=min_counts, min_expr_counts_U=0, min_cells_express=min_cells, min_cells_express_U=0) self.filter_genes(by_detection_levels=True) if n_top_genes is not None and n_top_genes < self.S.shape[0]: self.score_cv_vs_mean(n_top_genes, max_expr_avg=max_expr_avg) self.filter_genes(by_cv_vs_mean=True) self.score_detection_levels(min_expr_counts=0, min_cells_express=0, min_expr_counts_U=min_counts_u, min_cells_express_U=min_cells_u) self.filter_genes(by_detection_levels=True) self._normalize_S(relative_size=self.initial_cell_size, target_size=np.median(self.initial_cell_size)) self._normalize_U(relative_size=self.initial_Ucell_size, target_size=np.median(self.initial_Ucell_size)) #self.normalize_by_total() print("Number of genes to be used:", self.S.shape[0]) def impute(self, n_pcs=30, n_neighbors=30, balanced=False, renormalize=False): # counterpart to scv.pp.moments() self.perform_PCA(n_components=n_pcs) k = n_neighbors self.knn_imputation(n_pca_dims=n_pcs, k=k, balanced=balanced, b_sight=k * 8, b_maxl=k * 4) if renormalize: self.normalize_median() def velocity_estimation(self, limit_gamma=False, fit_offset=False): self.fit_gammas(limit_gamma=limit_gamma, fit_offset=fit_offset) self.filter_genes_good_fit() self.predict_U() self.calculate_velocity() self.calculate_shift() self.extrapolate_cell_at_t() print("Number of genes to be used:", self.S.shape[0]) def velocity_graph(self, n_neighbors=100, transform='linear', sampled_fraction=.5, expression_scaling=False, sigma_corr=.05, calculate_randomized=False): if not hasattr(self, 'ts'): raise ValueError('Compute embedding first.') else: # counterpart to scv.tl.velocity_graph() self.estimate_transition_prob(hidim="Sx_sz", embed="ts", transform=transform, n_neighbors=n_neighbors, knn_random=True, sampled_fraction=sampled_fraction, calculate_randomized=calculate_randomized) # counterpart to scv.tl.velocity_embedding() self.calculate_embedding_shift(sigma_corr=sigma_corr, expression_scaling=expression_scaling) def velocity_embedding(self, smooth=0.5, steps=(50, 50), n_neighbors=100): self.calculate_grid_arrows(smooth=smooth, steps=steps, n_neighbors=n_neighbors) def run_all(self, min_counts=3, min_counts_u=3, n_pcs=30, n_neighbors=30, n_neighbors_graph=100, n_top_genes=None, fit_offset=False, limit_gamma=False, transform='linear', expression_scaling=False,): from time import time start = time() self.filter_and_normalize(min_counts=min_counts, min_counts_u=min_counts_u, n_top_genes=n_top_genes) print('Preprocessing: ' + str(round(time() - start, 2))) timestamp = time() self.impute(n_pcs=n_pcs, n_neighbors=n_neighbors) print('Imputation: ' + str(round(time() - timestamp, 2))) timestamp = time() self.velocity_estimation(limit_gamma=limit_gamma, fit_offset=fit_offset) print('Velocity Estimation: ' + str(round(time() - timestamp, 2))) timestamp = time() self.velocity_graph(n_neighbors=n_neighbors_graph, transform=transform, expression_scaling=expression_scaling) print('Velocity Graph: ' + str(round(time() - timestamp, 2))) print('Total: ' + str(round(time() - start, 2))) return VelocytoLoom(adata, basis) PKPqM湚scvelo/tools/terminal_states.pyfrom .. import settings from .. import logging as logg from ..preprocessing.moments import get_connectivities from .transition_matrix import transition_matrix from .utils import scale from scipy.sparse import linalg, csr_matrix import numpy as np def eigs(T, k=10, eps=1e-3, perc=None): eigvals, eigvecs = linalg.eigs(T.T, k=k, which='LR') # find k eigs with largest real part p = np.argsort(eigvals)[::-1] # sort in descending order of eigenvalues eigvals = eigvals.real[p] eigvecs = eigvecs.real[:, p] idx = (eigvals >= 1 - eps) # select eigenvectors with eigenvalue of 1 eigvals = eigvals[idx] eigvecs = np.absolute(eigvecs[:, idx]) if perc is not None: lbs, ubs = np.percentile(eigvecs, perc, axis=0) eigvecs[eigvecs < lbs] = 0 eigvecs = np.clip(eigvecs, 0, ubs) eigvecs /= eigvecs.max(0) return eigvals, eigvecs def terminal_states(data, vkey='velocity', self_transitions=False, basis=None, weight_diffusion=0, scale_diffusion=1, eps=1e-3, copy=False): """Computes terminal states (root and end points) via eigenvalue decomposition. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. self_transitions: `bool` (default: `False`) Allow transitions from one node to itself. basis: `str` (default: `None`) Basis to use. weight_diffusion: `float` (default: 0) Relative weight to be given to diffusion kernel (Brownian motion) scale_diffusion: `float` (default: 1) Scale of diffusion kernel. eps: `float` (default: 1e-3) Tolerance for eigenvalue selection. copy: `bool` (default: `False`) Return a copy instead of writing to data. Returns ------- Returns or updates `data` with the attributes root: `.obs` sparse matrix with transition probabilities. end: `.obs` sparse matrix with transition probabilities. """ adata = data.copy() if copy else data connectivities = get_connectivities(adata, 'distances') logg.info('computing root cells', r=True, end=' ') T = transition_matrix(adata, vkey=vkey, basis=basis, weight_diffusion=weight_diffusion, scale_diffusion=scale_diffusion, self_transitions=self_transitions, backward=True) eigvecs = eigs(T, eps=eps, perc=[2, 98])[1] eigvec = csr_matrix.dot(connectivities, eigvecs).sum(1) eigvec = np.clip(eigvec, 0, np.percentile(eigvec, 98)) adata.obs['root'] = scale(eigvec) logg.info('using ' + str(eigvecs.shape[1]) + ' eigenvectors with eigenvalue 1.') logg.info('computing end points', end=' ') T = transition_matrix(adata, vkey=vkey, basis=basis, weight_diffusion=weight_diffusion, scale_diffusion=scale_diffusion, self_transitions=self_transitions, backward=False) eigvecs = eigs(T, eps=eps, perc=[2, 98])[1] eigvec = csr_matrix.dot(connectivities, eigvecs).sum(1) eigvec = np.clip(eigvec, 0, np.percentile(eigvec, 98)) adata.obs['end'] = scale(eigvec) logg.info('using ' + str(eigvecs.shape[1]) + ' eigenvectors with eigenvalue 1.') logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added\n' ' \'root\', root cells of Markov diffusion process (adata.obs)\n' ' \'end\', end points of Markov diffusion process (adata.obs)') return adata if copy else None PKâYM/ !scvelo/tools/transition_matrix.pyfrom .utils import normalize from scipy.spatial.distance import pdist, squareform from scipy.sparse import csr_matrix import numpy as np def transition_matrix(adata, vkey='velocity', basis=None, backward=False, self_transitions=True, scale=10, use_negative_cosines=False, weight_diffusion=0, scale_diffusion=1): """Computes transition probabilities by applying Gaussian kernel to cosine similarities x scale Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. basis: `str` or `None` (default: `None`) Restrict transition to embedding if specified backward: `bool` (default: `False`) Whether to use the transition matrix to push forward (`False`) or to pull backward (`True`) scale: `float` (default: 10) Scale parameter of gaussian kernel. weight_diffusion: `float` (default: 0) Relative weight to be given to diffusion kernel (Brownian motion) scale_diffusion: `float` (default: 1) Scale of diffusion kernel. Returns ------- Returns sparse matrix with transition probabilities. """ if vkey+'_graph' not in adata.uns: raise ValueError('You need to run `tl.velocity_graph` first to compute cosine correlations.') graph = csr_matrix(adata.uns[vkey + '_graph']).copy() if self_transitions: confidence = graph.max(1).A.flatten() ub = np.percentile(confidence, 98) self_prob = np.clip(ub - confidence, 0, 1) graph.setdiag(self_prob) T = np.expm1(graph * scale) # equivalent to np.exp(adata.uns[vkey + '_graph'].A * scale) - 1 if vkey + '_graph_neg' in adata.uns.keys(): T = T - np.expm1(-adata.uns[vkey + '_graph_neg'] * scale) if use_negative_cosines \ else T + (adata.uns[vkey + '_graph_neg'] < 0) * 1e-10 # weight direct neighbors with 1 and indirect (recurse) neighbors with 0.5 if 'neighbors' in adata.uns.keys(): direct_neighbors = adata.uns['neighbors']['distances'] > 0 direct_neighbors.setdiag(1) T = .5 * T + .5 * direct_neighbors.multiply(T) if backward: T = T.T T = normalize(T) if 'X_' + str(basis) in adata.obsm.keys(): dists_emb = (T > 0).multiply(squareform(pdist(adata.obsm['X_' + basis]))) scale_diffusion *= dists_emb.data.mean() diffusion_kernel = dists_emb.copy() diffusion_kernel.data = np.exp(-.5 * dists_emb.data ** 2 / scale_diffusion ** 2) T = T.multiply(diffusion_kernel) # combine velocity based kernel with diffusion based kernel if 0 < weight_diffusion < 1: # add another diffusion kernel (Brownian motion - like) diffusion_kernel.data = np.exp(-.5 * dists_emb.data ** 2 / (scale_diffusion/2) ** 2) T = (1-weight_diffusion) * T + weight_diffusion * diffusion_kernel T = normalize(T) return TPKxtMؘscvelo/tools/utils.pyfrom scipy.sparse import csr_matrix, issparse import pandas as pd import numpy as np import warnings def mean(x, axis=0): return x.mean(axis).A1 if issparse(x) else x.mean(axis) def make_dense(X): return X.A if issparse(X) and X.ndim==2 else X.A1 if issparse(X) else X def prod_sum_obs(A, B): """dot product and sum over axis 0 (obs) equivalent to np.sum(A * B, 0) """ return A.multiply(B).sum(0).A1 if issparse(A) else np.einsum('ij, ij -> j', A, B) def prod_sum_var(A, B): """dot product and sum over axis 1 (var) equivalent to np.sum(A * B, 1) """ return A.multiply(B).sum(1).A1 if issparse(A) else np.einsum('ij, ij -> i', A, B) def norm(A): """computes the L2-norm along axis 1 (e.g. genes or embedding dimensions) equivalent to np.linalg.norm(A, axis=1) """ return np.sqrt(A.multiply(A).sum(1).A1) if issparse(A) else np.sqrt(np.einsum('ij, ij -> i', A, A)) def vector_norm(x): """computes the L2-norm along axis 1 (e.g. genes or embedding dimensions) equivalent to np.linalg.norm(A, axis=1) """ return np.sqrt(np.einsum('i, i -> ', x, x)) def R_squared(residual, total): with warnings.catch_warnings(): warnings.simplefilter("ignore") r2 = np.ones(residual.shape[1]) - prod_sum_obs(residual, residual) / prod_sum_obs(total, total) r2[np.isnan(r2)] = 0 return r2 def cosine_correlation(dX, Vi): dX -= dX.mean(-1)[:, None] Vi_norm = vector_norm(Vi) with warnings.catch_warnings(): warnings.simplefilter("ignore") result = np.zeros(dX.shape[0]) if Vi_norm == 0 else np.einsum('ij, j', dX, Vi) / (norm(dX) * Vi_norm)[None, :] return result def normalize(X): with warnings.catch_warnings(): warnings.simplefilter("ignore") X = X.multiply(csr_matrix(1. / np.abs(X).sum(1))) if issparse(X) else X / X.sum(1) return X def scale(X, min=0, max=1): X = X - X.min() + min X = X / X.max() * max return X def get_indices(dist): n_neighbors = (dist > 0).sum(1).min() rows_idx = np.where((dist > 0).sum(1) > n_neighbors)[0] for row_idx in rows_idx: col_idx = dist[row_idx].indices[n_neighbors:] dist[row_idx, col_idx] = 0 dist.eliminate_zeros() indices = dist.indices.reshape((-1, n_neighbors)) return indices, dist def get_iterative_indices(indices, index, n_recurse_neighbors=2, max_neighs=None): def iterate_indices(indices, index, n_recurse_neighbors): return indices[iterate_indices(indices, index, n_recurse_neighbors - 1)] \ if n_recurse_neighbors > 1 else indices[index] indices = np.unique(iterate_indices(indices, index, n_recurse_neighbors)) if max_neighs is not None and len(indices) > max_neighs: indices = np.random.choice(indices, max_neighs, replace=False) return indices def groups_to_bool(adata, groups, groupby=None): groups = [groups] if isinstance(groups, str) else groups if isinstance(groups, (list, tuple, np.ndarray, np.record)): groupby = groupby if groupby in adata.obs.keys() else 'clusters' if 'clusters' in adata.obs.keys() \ else 'louvain' if 'louvain' in adata.obs.keys() else None if groupby is not None: groups = np.array([key in groups for key in adata.obs[groupby]]) else: raise ValueError('groupby attribute not valid.') return groups def most_common_in_list(lst): lst = list(lst) return max(set(lst), key=lst.count) def randomized_velocity(adata, vkey='velocity', add_key='velocity_random'): V_rnd = adata.layers[vkey].copy() for i in range(V_rnd.shape[1]): np.random.shuffle(V_rnd[:, i]) V_rnd[:, i] = V_rnd[:, i] * np.random.choice(np.array([+1, -1]), size=V_rnd.shape[0]) adata.layers[add_key] = V_rnd from .velocity_graph import velocity_graph from .velocity_embedding import velocity_embedding velocity_graph(adata, vkey=add_key) velocity_embedding(adata, vkey=add_key, autoscale=False) def extract_int_from_str(array): def str_to_int(item): num = "".join(filter(str.isdigit, item)) num = int(num) if len(num) > 0 else -1 return num if isinstance(array, str): nums = str_to_int(array) elif len(array) > 1 and isinstance(array[0], str): nums = [] for item in array: nums.append(str_to_int(item)) else: nums = array nums = pd.Categorical(nums) if array.dtype == 'category' else np.array(nums) return nums PK2bvM~D"D"scvelo/tools/velocity.pyfrom .. import settings from .. import logging as logg from ..preprocessing.moments import moments, second_order_moments from .optimization import leastsq_NxN, leastsq_generalized, maximum_likelihood from .utils import R_squared, groups_to_bool, make_dense import numpy as np import warnings warnings.simplefilter(action='ignore', category=FutureWarning) class Velocity: def __init__(self, adata=None, Ms=None, Mu=None, groups_for_fit=None, groupby=None, residual=None, use_raw=False,): self._adata = adata self._Ms = adata.layers['spliced'] if use_raw else adata.layers['Ms'] if Ms is None else Ms self._Mu = adata.layers['unspliced'] if use_raw else adata.layers['Mu'] if Mu is None else Mu self._Ms, self._Mu = make_dense(self._Ms), make_dense(self._Mu) n_obs, n_vars = self._Ms.shape self._residual, self._residual2 = residual, None self._offset, self._offset2 = np.zeros(n_vars, dtype=np.float32), np.zeros(n_vars, dtype=np.float32) self._gamma, self._r2 = np.zeros(n_vars, dtype=np.float32), np.zeros(n_vars, dtype=np.float32) self._beta, self._velocity_genes = np.ones(n_vars, dtype=np.float32), np.ones(n_vars, dtype=bool) self._groups_for_fit = groups_to_bool(adata, groups_for_fit, groupby) def compute_deterministic(self, fit_offset=False, perc=None): Ms = self._Ms if self._groups_for_fit is None else self._Ms[self._groups_for_fit] Mu = self._Mu if self._groups_for_fit is None else self._Mu[self._groups_for_fit] self._offset, self._gamma = leastsq_NxN(Ms, Mu, fit_offset, perc) self._residual = self._Mu - self._gamma * self._Ms if fit_offset: self._residual -= self._offset self._r2 = R_squared(self._residual, total=self._Mu - self._Mu.mean(0)) self._velocity_genes = (self._r2 > .01) & (self._gamma > .01) def compute_stochastic(self, fit_offset=False, fit_offset2=False, mode=None, perc=None): if self._residual is None: self.compute_deterministic(fit_offset=fit_offset, perc=perc) idx = self._velocity_genes is_subset = True if len(set(idx)) > 1 else False _adata = self._adata[:, idx] if is_subset else self._adata _Ms = self._Ms[:, idx] if is_subset else self._Ms _Mu = self._Mu[:, idx] if is_subset else self._Mu _residual = self._residual[:, idx] if is_subset else self._residual _Mss, _Mus = second_order_moments(_adata) var_ss = 2 * _Mss - _Ms cov_us = 2 * _Mus + _Mu _offset2, _gamma2 = leastsq_NxN(var_ss, cov_us, fit_offset2) # initialize covariance matrix res_std = _residual.std(0) res2_std = (cov_us - _gamma2 * var_ss - _offset2).std(0) # solve multiple regression self._offset[idx], self._offset2[idx], self._gamma[idx] = \ maximum_likelihood(_Ms, _Mu, _Mus, _Mss, fit_offset, fit_offset2) if mode == 'bayes' \ else leastsq_generalized(_Ms, _Mu, var_ss, cov_us, res_std, res2_std, fit_offset, fit_offset2, perc) self._residual = self._Mu - self._gamma * self._Ms if fit_offset: self._residual -= self._offset _residual2 = (cov_us - 2 * _Ms * _Mu) - self._gamma[idx] * (var_ss - 2 * _Ms ** 2) if fit_offset: _residual2 += 2 * self._offset[idx] * _Ms if fit_offset2: _residual2 -= self._offset2[idx] if is_subset: self._residual2 = np.zeros(self._Ms.shape, dtype=np.float32) self._residual2[:, idx] = _residual2 else: self._residual2 = _residual2 # if mode == 'alpha': # Muu = second_order_moments_u(adata) # offset2u, alpha = leastsq_NxN(np.ones(Mu.shape) + 2 * Mu, 2 * Muu - Mu) # pars.extend([offset2u, alpha]) # pars_str.extend(['_offset2u', '_alpha']) def get_pars(self): return self._offset, self._offset2, self._beta, self._gamma, self._r2, self._velocity_genes def get_pars_names(self): return ['_offset', '_offset2', '_beta', '_gamma', '_r2', '_genes'] def write_residuals(adata, vkey, residual=None, cell_subset=None): if residual is not None: if cell_subset is None: adata.layers[vkey] = residual else: if vkey not in adata.layers.keys(): adata.layers[vkey] = np.zeros(adata.shape, dtype=np.float32) adata.layers[vkey][cell_subset] = residual def write_pars(adata, vkey, pars, pars_names, add_key=None): for i, key in enumerate(pars_names): key = vkey + key + '_' + add_key if add_key is not None else vkey + key if len(set(pars[i])) > 1: adata.var[key] = pars[i] def velocity(data, vkey='velocity', mode=None, fit_offset=False, fit_offset2=False, filter_genes=False, groups=None, groupby=None, groups_for_fit=None, use_raw=False, perc=None, copy=False): """Estimates velocities in a gene-specific manner Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name under which to refer to the computed velocities for `velocity_graph` and `velocity_embedding`. mode: `'deterministic'`, `'stochastic'` or `'bayes'` (default: `'stochastic'`) Whether to run the estimation using the deterministic or stochastic model of transcriptional dynamics. `'bayes'` solves the stochastic model and accounts for heteroscedasticity, but is slower than `'stochastic'`. fit_offset: `bool` (default: `False`) Whether to fit with offset for first order moment dynamics. fit_offset2: `bool`, (default: `False`) Whether to fit with offset for second order moment dynamics. filter_genes: `bool` (default: `True`) Whether to remove genes that are not used for further velocity analysis. groups: `str`, `list` (default: `None`) Subset of groups, e.g. [‘g1’, ‘g2’, ‘g3’], to which velocity analysis shall be restricted. groupby: `str`, `list` or `np.ndarray` (default: `None`) Key of observations grouping to consider. groups_for_fit: `str`, `list` or `np.ndarray` (default: `None`) Subset of groups, e.g. [‘g1’, ‘g2’, ‘g3’], to which steady-state fitting shall be restricted. use_raw: `bool` (default: `False) Whether to use raw data for estimation. perc: `float` (default: `None`) Percentile, e.g. 98, upon which velocity estimation is performed (for robustness). copy: `bool` (default: `False`) Return a copy instead of writing to `adata`. Returns ------- Returns or updates `adata` with the attributes velocity: `.layers` velocity vectors for each individual cell variance_velocity: `.layers` velocity vectors for the cell variances velocity_offset, velocity_beta, velocity_gamma, velocity_r2: `.var` parameters """ adata = data.copy() if copy else data if not use_raw and 'Ms' not in adata.layers.keys(): moments(adata) logg.info('computing velocities', r=True) adata._sanitize() categories = adata.obs[groupby].cat.categories \ if groupby is not None and groups is None and groups_for_fit is None else [None] for cat in categories: groups = cat if cat is not None else groups cell_subset = groups_to_bool(adata, groups, groupby) _adata = adata if groups is None else adata[cell_subset] velo = Velocity(_adata, groups_for_fit=groups_for_fit, groupby=groupby, use_raw=use_raw) velo.compute_deterministic(fit_offset, perc=perc) if any([mode is not None and mode in item for item in ['stochastic', 'bayes', 'alpha']]): if filter_genes and len(set(velo._velocity_genes)) > 1: adata._inplace_subset_var(velo._velocity_genes) residual = velo._residual[:, velo._velocity_genes] _adata = adata if groups is None else adata[cell_subset] velo = Velocity(_adata, residual=residual, groups_for_fit=groups_for_fit, groupby=groupby) velo.compute_stochastic(fit_offset, fit_offset2, mode, perc=perc) write_residuals(adata, vkey, velo._residual, cell_subset) write_residuals(adata, 'variance_' + vkey, velo._residual2, cell_subset) write_pars(adata, vkey, velo.get_pars(), velo.get_pars_names(), add_key=cat) if filter_genes and len(set(velo._velocity_genes)) > 1: adata._inplace_subset_var(velo._velocity_genes) logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint('added \n' ' \'' + vkey + '\', velocity vectors for each individual cell (adata.layers)') return adata if copy else None PKPqMVN#scvelo/tools/velocity_confidence.pyfrom .. import logging as logg from .utils import prod_sum_var, norm, get_indices from ..preprocessing.moments import moments from .transition_matrix import transition_matrix from scanpy.api.pp import neighbors import numpy as np def velocity_confidence(data, vkey='velocity', copy=False): """Computes the confidences according to the velocity for each cell. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes velocity_length: `.obs` Length of the velocity vectors for each individual cell velocity_confidence: `.obs` Confidence for each cell """ adata = data.copy() if copy else data if vkey not in adata.layers.keys(): raise ValueError( 'You need to run `tl.velocity` first.') idx = adata.var['velocity_genes'] X, V = adata.layers['Ms'][:, idx].copy(), adata.layers[vkey][:, idx].copy() indices = get_indices(dist=adata.uns['neighbors']['distances'])[0] V -= V.mean(1)[:, None] V_norm = norm(V) R = np.zeros(adata.n_obs) for i in range(adata.n_obs): Vi_neighs = V[indices[i]] Vi_neighs -= Vi_neighs.mean(1)[:, None] R[i] = np.mean(np.einsum('ij, j', Vi_neighs, V[i]) / (norm(Vi_neighs) * V_norm[i])[None, :]) adata.obs[vkey + '_length'] = V_norm.round(2) adata.obs[vkey + '_confidence'] = R logg.hint('added \'' + vkey + '_confidence\' (adata.obs)') return adata if copy else None def velocity_confidence_transition(data, vkey='velocity', scale=10, copy=False): """Computes the confidences of transition according to the velocity for each cell. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. scale: `float` (default: 10) Scale parameter of gaussian kernel. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes velocity_confidence_transition: `.obs` Confidence of transition for each cell """ adata = data.copy() if copy else data if vkey not in adata.layers.keys(): raise ValueError( 'You need to run `tl.velocity` first.') idx = adata.var['velocity_genes'] T = transition_matrix(adata, vkey=vkey, scale=scale) dX = T.dot(adata.layers['Ms'][:, idx]) - adata.layers['Ms'][:, idx] dX -= dX.mean(1)[:, None] V = adata.layers[vkey][:, idx].copy() V -= V.mean(1)[:, None] adata.obs[vkey + '_confidence_transition'] = prod_sum_var(dX, V) / (norm(dX) * norm(V)) logg.hint('added \'' + vkey + '_confidence_transition\' (adata.obs)') return adata if copy else None def random_subsample(adata, frac=.5): subset = np.random.choice([True, False], size=adata.n_obs, p=[frac, 1-frac]).sum() adata.obs['subset'] = subset adata_subset = adata[subset].copy() neighbors(adata_subset) moments(adata_subset) return adata_subset def score_robustness(data, adata_subset=None, vkey='velocity', copy=False): adata = data.copy() if copy else data if adata_subset is None: adata_subset = random_subsample(adata) V = adata[adata.obs['subset']].layers[vkey] V_subset = adata_subset.layers[vkey] adata_subset.obs[vkey + '_score_robustness'] = prod_sum_var(V, V_subset) / (norm(V) * norm(V_subset)) return adata_subset if copy else None PKqvMvq"scvelo/tools/velocity_embedding.pyfrom .. import settings from .. import logging as logg from .utils import norm from .transition_matrix import transition_matrix from scipy.sparse import issparse import numpy as np import warnings def quiver_autoscale(X_emb, V_emb): import matplotlib.pyplot as pl scale_factor = X_emb.max() # just so that it handles very large values Q = pl.quiver(X_emb[:, 0] / scale_factor, X_emb[:, 1] / scale_factor, V_emb[:, 0], V_emb[:, 1], angles='xy', scale_units='xy', scale=None) Q._init() pl.clf() return Q.scale / scale_factor def velocity_embedding(data, basis=None, vkey='velocity', scale=10, self_transitions=True, use_negative_cosines=True, autoscale=True, all_comps=True, pca_transform=False, retain_scale=False, copy=False): """Computes the single cell velocities in the embedding Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'tsne'`) Which embedding to use. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. scale: `int` (default: 10) Scale parameter of gaussian kernel for transition matrix. self_transitions: `bool` (default: `True`) Whether to allow self transitions, based on the confidences of transitioning to neighboring cell. use_negative_cosines: `bool` (default: `True`) Whether to use not only positive, but also negative cosines and use those transitions to the opposite way. autoscale: `bool` (default: `True`) Whether to scale the embedded velocities by a scalar multiplier, which simply ensures that the arrows in the embedding are properly scaled. all_comps: `bool` (default: `True`) Whether to compute the velocities on all embedding components or just the first two. pca_transform: `bool` (default: `False`) Wether to directly project the velocities into PCA space, skipping velocity graph. retain_scale: `bool` (default: `False`) Whether to retain scale from high dimensional space in embedding. Returns ------- Returns or updates `adata` with the attributes velocity_umap: `.obsm` coordinates of velocity projection on embedding """ adata = data.copy() if copy else data if basis is None: keys = [key for key in ['pca', 'tsne', 'umap'] if 'X_' + key in adata.obsm.keys()] if len(keys) > 0: basis = keys[-1] else: raise ValueError('No basis specified') if 'X_' + basis not in adata.obsm_keys(): raise ValueError('You need compute the embedding first.') logg.info('computing velocity embedding', r=True) if 'pca' in basis and pca_transform: V = adata.layers['velocity'] PCs = adata.varm['PCs'] if all_comps else adata.varm['PCs'][:, :2] X_emb = adata.obsm['X_' + basis] V_emb = (V - V.mean(0)).dot(PCs) else: X_emb = adata.obsm['X_' + basis] if all_comps else adata.obsm['X_' + basis][:, :2] V_emb = np.zeros(X_emb.shape) T = transition_matrix(adata, vkey=vkey, scale=scale, self_transitions=self_transitions, use_negative_cosines=use_negative_cosines) T.setdiag(0) T.eliminate_zeros() TA = T.A if adata.n_obs < 8192 else None sparse = False if adata.n_obs < 8192 else True with warnings.catch_warnings(): warnings.simplefilter("ignore") for i in range(adata.n_obs): indices = T[i].indices dX = X_emb[indices] - X_emb[i, None] # shape (n_neighbors, 2) if not retain_scale: dX /= norm(dX)[:, None] dX[np.isnan(dX)] = 0 # zero diff in a steady-state probs = T[i].data if sparse else TA[i, indices] V_emb[i] = probs.dot(dX) - probs.mean() * dX.sum(0) # probs.sum() / len(indices) if retain_scale: delta = T.dot(adata.X) - adata.X if issparse(delta): delta = delta.A cos_proj = (adata.layers[vkey] * delta).sum(1) / norm(delta) V_emb *= np.clip(cos_proj[:, None] * 10, 0, 1) if autoscale: V_emb /= (3 * quiver_autoscale(X_emb, V_emb)) vkey += '_' + basis adata.obsm[vkey] = V_emb logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added\n' ' \'' + vkey + '\', embedded velocity vectors (adata.obsm)') return adata if copy else None PK<|tMF''scvelo/tools/velocity_graph.pyfrom .. import settings from .. import logging as logg from .utils import cosine_correlation, get_indices, get_iterative_indices, extract_int_from_str from .velocity import velocity from .rank_velocity_genes import rank_velocity_genes from scipy.sparse import coo_matrix, issparse import numpy as np def vals_to_csr(vals, rows, cols, shape): graph = coo_matrix((vals, (rows, cols)), shape=shape) graph_neg = graph.copy() graph.data = np.clip(graph.data, 0, 1) graph_neg.data = np.clip(graph_neg.data, -1, 0) graph.eliminate_zeros() graph_neg.eliminate_zeros() return graph.tocsr(), graph_neg.tocsr() class VelocityGraph: def __init__(self, adata, vkey='velocity', xkey='Ms', tkey=None, basis=None, n_neighbors=None, n_recurse_neighbors=None, random_neighbors_at_max=None, n_top_genes=None, sqrt_transform=False, approx=False): subset = np.ones(adata.n_vars, dtype=bool) if n_top_genes is not None and 'velocity_score' in adata.var.keys(): subset = adata.var['velocity_score'][::-1][:n_top_genes] elif 'velocity_genes' in adata.var.keys(): subset = adata.var['velocity_genes'] X = adata[:, subset].layers[xkey].A if issparse(adata.layers[xkey]) else adata[:, subset].layers[xkey] V = adata[:, subset].layers[vkey].A if issparse(adata.layers[vkey]) else adata[:, subset].layers[vkey] if approx and X.shape[1] > 100: from ..preprocessing.moments import pca X_pca, PCs, _, _ = pca(X, n_comps=50, svd_solver='arpack', return_info=True) self.X = np.array(X_pca, dtype=np.float32) self.V = (V - V.mean(0)).dot(PCs.T) else: self.X = np.array(X.copy(), dtype=np.float32) self.V = np.array(V.copy(), dtype=np.float32) self.sqrt_transform = sqrt_transform if sqrt_transform: self.V = np.sqrt(np.abs(self.V)) * np.sign(self.V) self.V -= self.V.mean(1)[:, None] self.n_recurse_neighbors = 1 if n_neighbors is not None \ else 2 if n_recurse_neighbors is None else n_recurse_neighbors if n_neighbors is None: if 'neighbors' not in adata.uns.keys(): from ..preprocessing.moments import neighbors neighbors(adata) self.indices = get_indices(dist=adata.uns['neighbors']['distances'])[0] else: from .. import Neighbors neighs = Neighbors(adata) if basis is None: basis = [key for key in ['X_pca', 'X_tsne', 'X_umap'] if key in adata.obsm.keys()][-1] neighs.compute_neighbors(n_neighbors=n_neighbors, use_rep=basis, n_pcs=10) self.indices = get_indices(dist=neighs.distances)[0] self.max_neighs = random_neighbors_at_max self.graph = adata.uns[vkey + '_graph'] if vkey + '_graph' in adata.uns.keys() else [] self.graph_neg = adata.uns[vkey + '_graph_neg'] if vkey + '_graph_neg' in adata.uns.keys() else [] if tkey in adata.obs.keys(): self.t0 = adata.obs[tkey].copy() init = min(self.t0) if isinstance(min(self.t0), int) else 0 self.t0.cat.categories = np.arange(init, len(self.t0.cat.categories)) self.t1 = self.t0 + 1 else: self.t0 = None def compute_cosines(self): vals, rows, cols, n_obs = [], [], [], self.X.shape[0] progress = logg.ProgressReporter(n_obs) for i in range(n_obs): neighs_idx = get_iterative_indices(self.indices, i, self.n_recurse_neighbors, self.max_neighs) if self.t0 is not None: t0, t1 = self.t0[i], self.t1[i] if t0 >= 0 and t1 > 0: t1_idx = np.where(self.t0 == t1)[0] if len(t1_idx) > len(neighs_idx): t1_idx = np.random.choice(t1_idx, len(neighs_idx), replace=False) if len(t1_idx) > 0: neighs_idx = np.unique(np.concatenate([neighs_idx, t1_idx])) if self.V[i].max() != 0 or self.V[i].min() != 0: dX = self.X[neighs_idx] - self.X[i, None] # 60% of runtime if self.sqrt_transform: dX = np.sqrt(np.abs(dX)) * np.sign(dX) val = cosine_correlation(dX, self.V[i]) # 40% of runtime else: val = np.zeros(len(neighs_idx)) vals.extend(val) rows.extend(np.ones(len(neighs_idx)) * i) cols.extend(neighs_idx) progress.update() progress.finish() vals = np.hstack(vals) vals[np.isnan(vals)] = 1e-10 # actually zero; just to store these entries in sparse matrix. self.graph, self.graph_neg = vals_to_csr(vals, rows, cols, shape=(n_obs, n_obs)) def velocity_graph(data, vkey='velocity', xkey='Ms', tkey=None, basis=None, n_neighbors=None, n_recurse_neighbors=None, random_neighbors_at_max=None, n_top_genes=None, sqrt_transform=False, approx=False, copy=False): """Computes a velocity graph based on cosine similarities. The cosine similarities are computed between velocities and potential cell state transitions. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. n_neighbors: `int` or `None` (default: None) Use fixed number of neighbors or do recursive neighbor search (if `None`). n_recurse_neighbors: `int` (default: 2) Number of recursions to be done for neighbors search. random_neighbors_at_max: `int` or `None` (default: `None`) If number of iterative neighbors for an individual cell is higher than this threshold, a random selection of such are chosen as reference neighbors. sqrt_transform: `bool` (default: `False`) Whether to variance-transform the cell states changes and velocities before computing cosine similarities. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes velocity_graph: `.uns` sparse matrix with transition probabilities """ adata = data.copy() if copy else data if vkey not in adata.layers.keys(): velocity(adata, vkey=vkey) if n_top_genes is not None and 'velocity_score' not in adata.var.keys(): rank_velocity_genes(adata, n_genes=100) vgraph = VelocityGraph(adata, vkey=vkey, xkey=xkey, tkey=tkey, basis=basis, n_neighbors=n_neighbors, approx=approx, n_recurse_neighbors=n_recurse_neighbors, random_neighbors_at_max=random_neighbors_at_max, n_top_genes=n_top_genes, sqrt_transform=sqrt_transform) logg.info('computing velocity graph', r=True) vgraph.compute_cosines() adata.uns[vkey+'_graph'] = vgraph.graph adata.uns[vkey+'_graph_neg'] = vgraph.graph_neg logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n') logg.hint( 'added \n' ' \'' + vkey + '_graph\', sparse matrix with cosine correlations (adata.uns)') return adata if copy else None PKkQMo #scvelo/tools/velocity_pseudotime.pyimport numpy as np def principal_curve(data, basis='pca', n_comps=4, clusters_list=None, copy=False): """Computes the principal curve Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'pca'`) Basis to use for computing the principal curve. n_comps: `int` (default: 4) Number of pricipal components to be used. copy: `bool`, (default: `False`) Return a copy instead of writing to adata. Returns ------- Returns or updates `adata` with the attributes principal_curve: `.uns` dictionary containing `projections`, `ixsort` and `arclength` """ adata = data.copy() if copy else data import rpy2.robjects as robjects from rpy2.robjects.packages import importr if clusters_list is not None: cell_subset = np.array([label in clusters_list for label in adata.obs['clusters']]) X_emb = adata[cell_subset].obsm['X_' + basis][:, :n_comps] else: cell_subset = None X_emb = adata.obsm['X_' + basis][:, :n_comps] n_obs, n_dim = X_emb.shape # convert array to R matrix xvec = robjects.FloatVector(X_emb.T.reshape((X_emb.size))) X_R = robjects.r.matrix(xvec, nrow=n_obs, ncol=n_dim) fit = importr("princurve").principal_curve(X_R) adata.uns['principal_curve'] = dict() adata.uns['principal_curve']['ixsort'] = ixsort = np.array(fit[1])-1 adata.uns['principal_curve']['projections'] = np.array(fit[0])[ixsort] adata.uns['principal_curve']['arclength'] = np.array(fit[2]) adata.uns['principal_curve']['cell_subset'] = cell_subset return adata if copy else None # def pseudotime(adata, iroot=229, clusters_list=None, copy=False): # # iroot = adata.obsm['X_umap'][adata.obs['clusters']=='neuroblast 1'][:,1].argmax() # root_cell = adata.obs_names[iroot] # if clusters_list is not None: # ['granule mature', 'neuroblast 1', 'neuroblast 2', 'granule immature'] # cell_subset = np.array([label in clusters_list for label in adata.obs['clusters']]) # adata_subset = adata.copy() # adata_subset._inplace_subset_obs(cell_subset) # adata_subset.uns['iroot'] = np.where(adata_subset.obs_names == root_cell)[0][0] # dpt(adata_subset, n_branchings=0) # # adata.obs['dpt_pseudotime'] = np.zeros(adata.n_obs) # adata.obs['dpt_pseudotime'][cell_subset] = adata_subset.obs['dpt_pseudotime'] # adata.obs['dpt_pseudotime'][~cell_subset] = -.5 # # return adata if copy else None PKwMm9}/scvelo-0.1.13.dist-info/LICENSEBSD 3-Clause License Copyright (c) 2018, Theis Lab All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PK!HSmPOscvelo-0.1.13.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,rzd&Y)r$[)T&UrPK!H"wA scvelo-0.1.13.dist-info/METADATAWrFߧ Nƒ현miK@3e%VL0Lߡo'wVV8-L&9{JW[Nr"x H(7dX\ElӝH:oL9tI&xf *MV?RQjk#J'Wj> s~m?yު%VW:qWdK*o#*'ry81L?FEMeQ$^Z[ryUtSjWXȞ,`&ڌoćҦT<Zl PmBp$(Q 1+e<[`Al=| h<<?Fh(U JYLx]!ԨGK2d;M (P}M8n;^Cc0 u cD*YU/O#`7a5-t/TĘ+\GmGP7Gu/Ae[`CAZY+D-41^r-dcj+yf˅=}/swݗ[rfa)SԗPno6 %4p'mWs {K vɷVw_l~*W@y L.;%v`ט aR.4YDW_LqNlu!x \!}50q"p8\L6/Dp$9_h ;d!JCd^)Mț'{-,_ F N郺PխztQg2d@b]L@S4ϬYH׈r^10q3]iv&r}o,\7)僪`n L7Z,kQJ:4Zu0W?ڟ!Sf "^/w ZotLiz?/;UN"X]<&Y&J 7oJyWZiC046HhЖc PK!He$ scvelo-0.1.13.dist-info/RECORDrZ}KY DDP$3ӏzlnjl>tsxI (b_LypSTIZCma:BvPN) 'im?RӋTlZ`d+0m2h[NCnh 1!7*K`IǩMXDp|1RUƒk?y÷:>v p~.ˊ6{rsvVy>>d>w8x\Sd]CH\\`~QZb-}l쵄P_ }XUzC`<@*pY#hYT ]'c&A݅ɋMVwG6z+1KHo ykYֱ~!j)atvT#`.Gs5˃@zW\VUj PC= v}gnH m$ѧi}7+t$tJE6M$REX^FL5}rջ p`-pTFkǠW@atsѤ/j<4]Y\DS4~a|MhFNq8BSPF.KmJ@}AWE&/S.n)[TH*L".,bx?e)N7Dп_q1oӦ[zRqc6TX͈c"0EhiIkŨ4{I]Wdy9k|}vם)D=>DTn~u)湚YlMI4 ] Cȏi?vq:M?hMDa^Г.&p+m^ 0tM~ZY;F++[!qÞADofE Hv`Yx*3uBULR]WOߥ;(`Q^څ69'm!{#?^?W ; &"[SH\ wM& ha[mZC;\xے: )zjg1<9 0 OrPv6@AWg,V8Dtj5s(Eۚ̐5 s 4E4 V 04Ww1e,&YK"u}`*~4hy,/r%h\ݘ&e,S) Ba# K#l<7]_<} - 43r0}IMh߬֫ K{di9_ɣNz>l W>Aܵ"Iuu R %xM7/%7Fq"J) <*%vT{W.ťAut#b\YbtgC~1êC1HKD9TŸ`s%J_9`;ǃ E>Wتa}snX@rM-TcLDd+|ڽ.诤fhoݿpT{s;$D[;SL#.Lu|V!G9N~v*승^;3[yz^?? g1,c1od±MnHn'%Y,mve}3g PKqMd(scvelo/__init__.pyPK `qM}-+scvelo/datasets.pyPKNXMbx7 scvelo/get_version.pyPK~YMSD9scvelo/logging.pyPK(MN/ 2scvelo/pl.pyPK(Md## a2scvelo/pp.pyPKQqM$;2scvelo/read_load.pyPKuM?QWAA|Gscvelo/settings.pyPK(ME dscvelo/tl.pyPKZftMuH 2escvelo/utils.pyPKdnM՝  pscvelo/plotting/__init__.pyPK,MS* * @rscvelo/plotting/docs.pyPK Mv77~scvelo/plotting/pseudotime.pyPK\vMAKscvelo/plotting/scatter.pyPKSvMW ++ scvelo/plotting/utils.pyPKuM0ijscvelo/plotting/velocity.pyPK]vM1m%scvelo/plotting/velocity_embedding.pyPK^vM>G77*{scvelo/plotting/velocity_embedding_grid.pyPK.smM{*2 ,scvelo/preprocessing/__init__.pyPKrM0}!!-scvelo/preprocessing/moments.pyPKgrMIIOscvelo/preprocessing/utils.pyPK)VMAODscvelo/tools/__init__.pyPKdpMazVJJ"scvelo/tools/optimization.pyPKIuMp>TF#scvelo/tools/rank_velocity_genes.pyPKpMqvuscvelo/tools/run.pyPKPqM湚scvelo/tools/terminal_states.pyPKâYM/ !scvelo/tools/transition_matrix.pyPKxtMؘscvelo/tools/utils.pyPK2bvM~D"D"nscvelo/tools/velocity.pyPKPqMVN#;scvelo/tools/velocity_confidence.pyPKqvMvq"Jscvelo/tools/velocity_embedding.pyPK<|tMF''\scvelo/tools/velocity_graph.pyPKkQMo #yscvelo/tools/velocity_pseudotime.pyPKwMm9}/Ascvelo-0.1.13.dist-info/LICENSEPK!HSmPOcscvelo-0.1.13.dist-info/WHEELPK!H"wA scvelo-0.1.13.dist-info/METADATAPK!He$ scvelo-0.1.13.dist-info/RECORDPK%%f