DPT benchmark on toy GRN#
Notebook benchmarks latent time inference using DPT on toy GRN data.
Library imports#
from tqdm import tqdm
import numpy as np
import pandas as pd
import scanpy as sc
from rgv_tools import DATA_DIR
from rgv_tools.benchmarking import get_data_subset, get_time_correlation
from rgv_tools.core import read_as_dask
Constants#
DATASET = "toy_grn"
SAVE_DATA = True
if SAVE_DATA:
(DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)
Function definitions#
Data loading#
adata = read_as_dask(store=DATA_DIR / DATASET / "raw" / "adata.zarr", layers=[])
adata
Pseudotime pipeline#
time_correlation = []
for dataset in tqdm(adata.obs["dataset"].cat.categories):
adata_subset = get_data_subset(adata=adata, column="dataset", group=dataset, uns_keys=[])
adata_subset.uns["iroot"] = np.flatnonzero(adata_subset.obs["true_time"] == 0)[0]
sc.pp.neighbors(adata_subset)
sc.tl.diffmap(adata_subset)
sc.tl.dpt(adata_subset)
time_correlation.append(
get_time_correlation(
ground_truth=adata_subset.obs["true_time"], estimated=adata_subset.obs["dpt_pseudotime"].values
)
)
Data saving#
if SAVE_DATA:
pd.DataFrame({"time": time_correlation}).to_parquet(path=DATA_DIR / DATASET / "results" / "dpt_correlation.parquet")