scVelo-based analysis of hematopoiesis dataset#

Notebook runs the scVelo model on the hematopoiesis dataset.

Library imports#

import numpy as np

import anndata as ad
import cellrank as cr
import scanpy as sc
import scvelo as scv

from rgv_tools import DATA_DIR
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_csv from `anndata` is deprecated. Import anndata.io.read_csv instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_excel from `anndata` is deprecated. Import anndata.io.read_excel instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_hdf from `anndata` is deprecated. Import anndata.io.read_hdf instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_loom from `anndata` is deprecated. Import anndata.io.read_loom instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_mtx from `anndata` is deprecated. Import anndata.io.read_mtx instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_text from `anndata` is deprecated. Import anndata.io.read_text instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_umi_tools from `anndata` is deprecated. Import anndata.io.read_umi_tools instead.
  warnings.warn(msg, FutureWarning)

General settings#

sc.settings.verbosity = 2
scv.settings.verbosity = 3

Constants#

DATASET = "hematopoiesis"
SAVE_DATA = True
if SAVE_DATA:
    (DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)
TERMINAL_STATES = ["Mon", "Meg", "Bas", "Ery"]

Data loading#

adata = ad.io.read_h5ad(DATA_DIR / DATASET / "processed" / "adata_preprocessed.h5ad")
adata_full = ad.io.read_h5ad(DATA_DIR / DATASET / "processed" / "adata_preprocessed_full.h5ad")

Run scVelo#

velocity_genes = adata.var["velocity_genes"].copy()
scv.tl.recover_dynamics(adata, fit_scaling=False, var_names=adata.var_names)
adata.var["fit_scaling"] = 1.0
recovering dynamics (using 1/128 cores)
    finished (0:00:38) --> added 
    'fit_pars', fitted parameters for splicing dynamics (adata.var)
scv.tl.velocity(adata, mode="dynamical", min_likelihood=-np.inf, min_r2=None)
computing velocities
    finished (0:00:00) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
adata.var["velocity_genes"] = velocity_genes

Calculate lineage fate probabilities and identify fate-associated genes#

vk = cr.kernels.VelocityKernel(adata)
vk.compute_transition_matrix()
estimator = cr.estimators.GPCCA(vk)  ## We used vk here due to we want to benchmark on velocity

estimator.compute_macrostates(n_states=5, cluster_key="cell_type")
estimator.set_terminal_states(TERMINAL_STATES)

estimator.compute_fate_probabilities()
estimator.adata = adata_full.copy()
scv_ranking = estimator.compute_lineage_drivers(return_drivers=True, cluster_key="cell_type")

scv_ranking = scv_ranking.loc[:, ["Ery_corr", "Mon_corr", "Ery_pval", "Mon_pval"]]
WARNING: Unable to import `petsc4py` or `slepc4py`. Using `method='brandts'`
WARNING: For `method='brandts'`, dense matrix is required. Densifying
WARNING: Unable to import petsc4py. For installation, please refer to: https://petsc4py.readthedocs.io/en/stable/install.html.
Defaulting to `'gmres'` solver.
ERROR: Unable to duplicate function call using shadow `anndata.AnnData` object. Reason: `value.index does not match parent’s var names:
Index are different

Index length are different
[left]:  2000, Index(['DPM1', 'GCLC', 'NFYA', 'ANKIB1', 'LASP1', 'ALS2', 'CFLAR', 'AK2',
       'RBM6', 'SLC25A13',
       ...
       'HERC2P2', 'STAG3L2', 'PMS2P14', 'PSMB3', 'DDX52', 'MYO19', 'PMS2P2',
       'ACACA', 'MRPL45', 'WASH9P'],
      dtype='object', length=2000)
[right]: 159, Index(['ADCY6', 'ALS2', 'ANKRD36C', 'ANXA1', 'ARHGAP11A', 'ARHGAP30', 'ARID5B',
       'ASPM', 'ATF6', 'BACE2',
       ...
       'UFL1', 'VASH1', 'VIM', 'VWF', 'ZEB1', 'ZFHX3', 'ZFPM1', 'ZNF263',
       'ZNF274', 'ZYX'],
      dtype='object', length=159)`

Save dataset#

Recalculate PCA for downstream CBC computation, as velocity is derived from the moment matrices.

sc.tl.pca(adata, layer="Ms")

Save adata with velocity layer

if SAVE_DATA:
    adata.write_h5ad(DATA_DIR / DATASET / "processed" / "adata_run_scvelo.h5ad")

Save uncertainty and gene ranking results

if SAVE_DATA:
    scv_ranking.to_csv(DATA_DIR / DATASET / "results" / "scv_ranking.csv")