Inferring transition probabilities from time-resolved single-cell data

Inferring transition probabilities from time-resolved single-cell data#

Here, we show how to infer cell-cell transition probabilities, using the RealTimeKernel. The required data can be downloaded here from FigShare. The data is expected to be in data/larry/processed/, i.e.,

mkdir -p ../../data/larry/processed/
wget https://figshare.com/ndownloader/files/56386622 -O ../../data/larry/processed/adata.zarr.zip

Library imports#

import matplotlib.pyplot as plt
import mplscience

import anndata as ad
import cellrank as cr
import scanpy as sc
import scvelo as scv
from moscot.problems.time import TemporalProblem

from crp import DATA_DIR, FIG_DIR

General settings#

sc.settings.verbosity = 2
cr.settings.verbosity = 4
scv.settings.verbosity = 3
scv.settings.set_figure_params("scvelo", dpi_save=400, dpi=80, transparent=True, fontsize=20, color_map="viridis")

Constants#

DATASET = "larry"
SAVE_FIGURES = False
if SAVE_FIGURES:
    (FIG_DIR / DATASET).mkdir(parents=True, exist_ok=True)

FIGURE_FORMATE = "svg"

Function definitions#

Data loading#

adata = ad.io.read_zarr(DATA_DIR / DATASET / "processed" / "adata.zarr.zip")
adata
AnnData object with n_obs × n_vars = 130887 × 25289
    obs: 'day', 'cell_type'
    obsm: 'latent_rep'
sc.pp.subsample(adata, fraction=0.25, random_state=0)
adata
AnnData object with n_obs × n_vars = 32721 × 25289
    obs: 'day', 'cell_type'
    obsm: 'latent_rep'

Data preprocessing#

sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=1500, subset=True)
adata
extracting highly variable genes
    finished (0:00:00)
AnnData object with n_obs × n_vars = 32721 × 1500
    obs: 'day', 'cell_type'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'log1p', 'hvg'
    obsm: 'latent_rep'
sc.tl.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
computing PCA
    with n_comps=50
    finished (0:00:00)
computing neighbors
    using 'X_pca' with n_pcs = 30
    finished (0:00:11)
with mplscience.style_context():
    fig, ax = plt.subplots(figsize=(6, 4))
    scv.pl.scatter(adata, basis="latent_rep", color="day", color_map="viridis", size=25, title="", ax=ax)
    plt.show()

    fig, ax = plt.subplots(figsize=(6, 4))
    scv.pl.scatter(adata, basis="latent_rep", color="cell_type", size=25, title="", ax=ax)
    plt.show()
Renamed 'latent_rep' to convention 'X_latent_rep' (adata.obsm).
../_images/c59cd8571be2ad5560767b124e4be147aa90b667f75b5d5a75c8b4821ad24e90.png ../_images/769df70f9da5a6e8302f9d1582ded351c8be4aba538a7c047038ce1ac381ad45.png

CellRank pipeline#

adata.obs["day"] = adata.obs["day"].astype("category")
tp = TemporalProblem(adata)
tp = tp.prepare(time_key="day")
INFO     Computing pca with `n_comps=30` for `xy` using `adata.X`                                                  
computing PCA
    with n_comps=30
    finished (0:00:00)
INFO     Computing pca with `n_comps=30` for `xy` using `adata.X`                                                  
computing PCA
    with n_comps=30
    finished (0:00:00)
tp = tp.solve(epsilon=1e-3, tau_a=0.95, scale_cost="mean")
INFO     Solving `2` problems                                                                                      
INFO     Solving problem BirthDeathProblem[stage='prepared', shape=(7004, 12136)].                                 
INFO     Solving problem BirthDeathProblem[stage='prepared', shape=(12136, 13581)].                                
rtk = cr.kernels.RealTimeKernel.from_moscot(tp)
rtk.compute_transition_matrix(self_transitions="all", conn_weight=0.2, threshold="auto")
Using automatic `threshold=1.0144462310110458e-12`
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished (0:00:00)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished (0:00:01)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished (0:00:00)
RealTimeKernel[n=32721, threshold='auto', self_transitions='all']