Metabolic-labeling based RNA velocity inference#
Metabolic-labeling based RNA velocity inference as proposed in
CellRank 2: unified fate mapping in multiview single-cell data; the
required data can be downloaded here from FigShare and is expected to
be in data/rpe1/processed/, i.e.,
mkdir -p ../../data/rpe/processed/
wget https://figshare.com/ndownloader/files/56614223 -O ../../data/rpe1/processed/adata.h5ad
Library imports#
from scipy.sparse import csr_matrix
import anndata as ad
import scanpy as sc
import scvelo as scv
from scvelo.inference import (
get_labeling_time_mask,
get_labeling_times,
get_n_neighbors,
get_obs_dist_argsort,
get_parameters,
)
from crp import DATA_DIR, FIG_DIR
General settings#
sc.settings.verbosity = 3
scv.settings.verbosity = 3
SAVE_DATA = True
SAVE_FIGURES = False
Constants#
DATASET_ID = "rpe1"
if SAVE_DATA:
(DATA_DIR / DATASET_ID / "results").mkdir(parents=True, exist_ok=True)
if SAVE_FIGURES:
(FIG_DIR / DATASET_ID).mkdir(parents=True, exist_ok=True)
FIGURE_FORMAT = "pdf"
N_JOBS = 8
Function definitions#
Data loading#
adata = ad.io.read_h5ad(DATA_DIR / DATASET_ID / "processed" / "adata.h5ad")
adata
Data preprocessing#
adata = adata[adata.obs["labeling_time"] != "dmso", :].copy()
adata.obs["labeling_time"] = adata.obs["labeling_time"].astype(float) / 60
adata.obs.drop(["well_id", "plate_id", "log10_gfp", "log10_gfp"], axis=1, inplace=True)
del (
adata.layers["labeled_unspliced"],
adata.layers["labeled_spliced"],
adata.layers["unlabeled_unspliced"],
adata.layers["unlabeled_spliced"],
)
scv.pp.filter_and_normalize(adata, min_counts=50, layers_normalize=["X", "labeled", "unlabeled", "total"], log=False)
adata
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True)
adata
sc.tl.pca(adata)
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=30)
adata.layers["labeled_smoothed"] = csr_matrix.dot(adata.obsp["connectivities"], adata.layers["labeled"]).A
adata.layers["unlabeled_smoothed"] = csr_matrix.dot(adata.obsp["connectivities"], adata.layers["unlabeled"]).A
adata.layers["total_smoothed"] = csr_matrix.dot(adata.obsp["connectivities"], adata.layers["total"]).A
Parameter inference#
time_key = "labeling_time"
labeling_times = get_labeling_times(adata=adata, time_key="labeling_time")
labeling_time_mask = get_labeling_time_mask(adata=adata, time_key=time_key, labeling_times=labeling_times)
obs_dist_argsort = get_obs_dist_argsort(adata=adata, labeling_time_mask=labeling_time_mask)
n_neighbors = get_n_neighbors(
adata,
labeling_time_mask=labeling_time_mask,
obs_dist_argsort=obs_dist_argsort,
n_nontrivial_counts=20,
use_rep="labeled_smoothed",
n_jobs=N_JOBS,
)
alpha, gamma, r0, success, opt_res = get_parameters(
adata=adata,
use_rep="labeled_smoothed",
time_key="labeling_time",
experiment_key="experiment",
n_neighbors=n_neighbors,
x0=None,
n_jobs=N_JOBS,
)
adata.layers["transcription_rate"] = alpha.loc[adata.obs_names, adata.var_names]
adata.layers["degradation_rate"] = gamma.loc[adata.obs_names, adata.var_names]
adata.layers["r0"] = r0.loc[adata.obs_names, adata.var_names]
# When initializing the VelocityKernel, specify `vkey="velocity_labeled"`
adata.layers["velocity_labeled"] = (alpha - gamma * adata.layers["labeled_smoothed"]).values