CellOracle benchmark on dyngen data

CellOracle benchmark on dyngen data#

Notebook benchmarks GRN inference using CellOracle on dyngen-generated data.

Library imports#

import numpy as np
import pandas as pd
import torch
from sklearn.metrics import roc_auc_score

import anndata as ad
import celloracle as co
import scvi

from rgv_tools import DATA_DIR

General settings#

scvi.settings.seed = 0
INFO: [rank: 0] Seed set to 0
2025-04-28 22:17:36,979 - INFO - [rank: 0] Seed set to 0

Constants#

DATASET = "dyngen"
COMPLEXITY = "complexity_1"
SAVE_DATA = True
if SAVE_DATA:
    (DATA_DIR / DATASET / COMPLEXITY / "results").mkdir(parents=True, exist_ok=True)

Velocity pipeline#

grn_correlation = []

cnt = 0
for filename in (DATA_DIR / DATASET / COMPLEXITY / "processed").iterdir():
    torch.cuda.empty_cache()
    if filename.suffix != ".zarr":
        continue

    simulation_id = int(filename.stem.removeprefix("simulation_"))
    print(f"Run {cnt}, dataset {simulation_id}.")
    adata = ad.io.read_zarr(filename)

    TF = adata.var_names[adata.var["is_tf"]]
    TF_ind = [adata.var_names.get_loc(tf) for tf in TF]

    grn_true = adata.uns["true_skeleton"][:, TF_ind]
    grn_sc_true = adata.uns["true_sc_grn"][:, TF_ind]

    base_grn = np.ones((len(TF), adata.n_vars))
    base_grn = pd.DataFrame(base_grn, index=TF, columns=adata.var_names)
    base_grn["peak_id"] = ["peak_" + i for i in TF]
    base_grn["gene_short_name"] = TF
    base_grn = base_grn[["peak_id", "gene_short_name"] + adata.var_names.to_list()]

    net = co.Net(gene_expression_matrix=adata.to_df(layer="Ms"), TFinfo_matrix=base_grn, verbose=False)
    net.fit_All_genes(bagging_number=100, alpha=1, verbose=False)
    net.updateLinkList(verbose=False)

    grn_estimate = pd.pivot(net.linkList[["source", "target", "coef_mean"]], index="target", columns="source")
    grn_estimate = grn_estimate.fillna(0).values

    grn_auroc = []
    for cell_id in range(adata.n_obs):
        ground_truth = grn_sc_true[:, :, cell_id]

        if ground_truth.sum() > 0:
            ground_truth = ground_truth.T[np.array(grn_true.T) == 1]
            ground_truth[ground_truth != 0] = 1

            estimated = grn_estimate[np.array(grn_true.T) == 1]
            estimated = np.abs(estimated)

            number = min(10000, len(ground_truth))

            estimated, index = torch.topk(torch.tensor(estimated), number)

            if len(np.unique(ground_truth[index])) < 2:
                print("Skipping cell due to single-class ground truth")
                grn_auroc.append(np.nan)
            else:
                grn_auroc.append(roc_auc_score(ground_truth[index], estimated))

    grn_correlation.append(np.mean(grn_auroc))
    cnt += 1
Run 0, dataset 29.
Run 1, dataset 14.
Run 2, dataset 24.
Run 3, dataset 28.
Run 4, dataset 6.
Run 5, dataset 21.
Run 6, dataset 15.
Run 7, dataset 9.
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Run 8, dataset 12.
Run 9, dataset 19.
Run 10, dataset 4.
Run 11, dataset 13.
Run 12, dataset 2.
Run 13, dataset 16.
Run 14, dataset 1.
Run 15, dataset 18.
Run 16, dataset 5.
Run 17, dataset 10.
Run 18, dataset 8.
Run 19, dataset 11.
Run 20, dataset 27.
Run 21, dataset 23.
Run 22, dataset 17.
Run 23, dataset 30.
Run 24, dataset 22.
Run 25, dataset 25.
Run 26, dataset 20.
Run 27, dataset 7.
Run 28, dataset 3.
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Skipping cell due to single-class ground truth
Run 29, dataset 26.
grn_sc_true[:, :, 1].shape
(494, 37)
ground_truth.shape
(494, 37)
grn_true.shape
(494, 37)
ground_truth
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])
estimated
tensor([0.0475, 0.0389, 0.0381, 0.0322, 0.0313, 0.0265, 0.0262, 0.0218, 0.0216,
        0.0209, 0.0188, 0.0182, 0.0174, 0.0169, 0.0163, 0.0160, 0.0158, 0.0150,
        0.0147, 0.0142, 0.0137, 0.0137, 0.0131, 0.0128, 0.0125, 0.0114, 0.0103,
        0.0096, 0.0094, 0.0094, 0.0090, 0.0086, 0.0085, 0.0083, 0.0081, 0.0072,
        0.0069, 0.0069, 0.0068, 0.0067, 0.0066, 0.0066, 0.0065, 0.0064, 0.0064,
        0.0063, 0.0062, 0.0062, 0.0059, 0.0056, 0.0054, 0.0052, 0.0051, 0.0048,
        0.0047, 0.0046, 0.0045, 0.0043, 0.0043, 0.0043, 0.0043, 0.0042, 0.0034,
        0.0029, 0.0029, 0.0028, 0.0025, 0.0022, 0.0018, 0.0012, 0.0009, 0.0007,
        0.0007, 0.0004, 0.0003, 0.0000, 0.0000])
ground_truth[index]
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

Data saving#

if SAVE_DATA:
    pd.DataFrame({"grn": grn_correlation}).to_parquet(
        path=DATA_DIR / DATASET / COMPLEXITY / "results" / "celloracle_correlation.parquet"
    )