DPT benchmark on toy GRN#

Notebook benchmarks latent time inference using DPT on toy GRN data.

Library imports#

from tqdm import tqdm

import numpy as np
import pandas as pd

import scanpy as sc

from rgv_tools import DATA_DIR
from rgv_tools.benchmarking import get_data_subset, get_time_correlation
from rgv_tools.core import read_as_dask

Constants#

DATASET = "toy_grn"
SAVE_DATA = True
if SAVE_DATA:
    (DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)

Function definitions#

Data loading#

adata = read_as_dask(store=DATA_DIR / DATASET / "raw" / "adata.zarr", layers=[])
adata

Pseudotime pipeline#

time_correlation = []

for dataset in tqdm(adata.obs["dataset"].cat.categories):
    adata_subset = get_data_subset(adata=adata, column="dataset", group=dataset, uns_keys=[])

    adata_subset.uns["iroot"] = np.flatnonzero(adata_subset.obs["true_time"] == 0)[0]

    sc.pp.neighbors(adata_subset)
    sc.tl.diffmap(adata_subset)
    sc.tl.dpt(adata_subset)

    time_correlation.append(
        get_time_correlation(
            ground_truth=adata_subset.obs["true_time"], estimated=adata_subset.obs["dpt_pseudotime"].values
        )
    )

Data saving#

if SAVE_DATA:
    pd.DataFrame({"time": time_correlation}).to_parquet(path=DATA_DIR / DATASET / "results" / "dpt_correlation.parquet")