scVelo-based analysis of hematopoiesis dataset#
Notebook runs the scVelo model on the hematopoiesis dataset.
Library imports#
import numpy as np
import anndata as ad
import cellrank as cr
import scanpy as sc
import scvelo as scv
from rgv_tools import DATA_DIR
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_csv from `anndata` is deprecated. Import anndata.io.read_csv instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_excel from `anndata` is deprecated. Import anndata.io.read_excel instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_hdf from `anndata` is deprecated. Import anndata.io.read_hdf instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_loom from `anndata` is deprecated. Import anndata.io.read_loom instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_mtx from `anndata` is deprecated. Import anndata.io.read_mtx instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_text from `anndata` is deprecated. Import anndata.io.read_text instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/dynamo/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_umi_tools from `anndata` is deprecated. Import anndata.io.read_umi_tools instead.
warnings.warn(msg, FutureWarning)
General settings#
sc.settings.verbosity = 2
scv.settings.verbosity = 3
Constants#
DATASET = "hematopoiesis"
SAVE_DATA = True
if SAVE_DATA:
(DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)
TERMINAL_STATES = ["Mon", "Meg", "Bas", "Ery"]
Data loading#
adata = ad.io.read_h5ad(DATA_DIR / DATASET / "processed" / "adata_preprocessed.h5ad")
adata_full = ad.io.read_h5ad(DATA_DIR / DATASET / "processed" / "adata_preprocessed_full.h5ad")
Run scVelo#
velocity_genes = adata.var["velocity_genes"].copy()
scv.tl.recover_dynamics(adata, fit_scaling=False, var_names=adata.var_names)
adata.var["fit_scaling"] = 1.0
recovering dynamics (using 1/128 cores)
finished (0:00:38) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
scv.tl.velocity(adata, mode="dynamical", min_likelihood=-np.inf, min_r2=None)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
adata.var["velocity_genes"] = velocity_genes
Calculate lineage fate probabilities and identify fate-associated genes#
vk = cr.kernels.VelocityKernel(adata)
vk.compute_transition_matrix()
estimator = cr.estimators.GPCCA(vk) ## We used vk here due to we want to benchmark on velocity
estimator.compute_macrostates(n_states=5, cluster_key="cell_type")
estimator.set_terminal_states(TERMINAL_STATES)
estimator.compute_fate_probabilities()
estimator.adata = adata_full.copy()
scv_ranking = estimator.compute_lineage_drivers(return_drivers=True, cluster_key="cell_type")
scv_ranking = scv_ranking.loc[:, ["Ery_corr", "Mon_corr", "Ery_pval", "Mon_pval"]]
WARNING: Unable to import `petsc4py` or `slepc4py`. Using `method='brandts'`
WARNING: For `method='brandts'`, dense matrix is required. Densifying
WARNING: Unable to import petsc4py. For installation, please refer to: https://petsc4py.readthedocs.io/en/stable/install.html.
Defaulting to `'gmres'` solver.
ERROR: Unable to duplicate function call using shadow `anndata.AnnData` object. Reason: `value.index does not match parent’s var names:
Index are different
Index length are different
[left]: 2000, Index(['DPM1', 'GCLC', 'NFYA', 'ANKIB1', 'LASP1', 'ALS2', 'CFLAR', 'AK2',
'RBM6', 'SLC25A13',
...
'HERC2P2', 'STAG3L2', 'PMS2P14', 'PSMB3', 'DDX52', 'MYO19', 'PMS2P2',
'ACACA', 'MRPL45', 'WASH9P'],
dtype='object', length=2000)
[right]: 159, Index(['ADCY6', 'ALS2', 'ANKRD36C', 'ANXA1', 'ARHGAP11A', 'ARHGAP30', 'ARID5B',
'ASPM', 'ATF6', 'BACE2',
...
'UFL1', 'VASH1', 'VIM', 'VWF', 'ZEB1', 'ZFHX3', 'ZFPM1', 'ZNF263',
'ZNF274', 'ZYX'],
dtype='object', length=159)`
Save dataset#
Recalculate PCA for downstream CBC computation, as velocity is derived from the moment matrices.
sc.tl.pca(adata, layer="Ms")
Save adata with velocity layer
if SAVE_DATA:
adata.write_h5ad(DATA_DIR / DATASET / "processed" / "adata_run_scvelo.h5ad")
Save uncertainty and gene ranking results
if SAVE_DATA:
scv_ranking.to_csv(DATA_DIR / DATASET / "results" / "scv_ranking.csv")