Correlation benchmark on toy GRN#

Notebook benchmarks GRN inference using a correlation-based scheme on toy GRN data.

Library imports#

from tqdm import tqdm

import pandas as pd

from rgv_tools import DATA_DIR
from rgv_tools.benchmarking import get_data_subset, get_grn_auroc
from rgv_tools.core import read_as_dask

Constants#

DATASET = "toy_grn"
SAVE_DATA = True
if SAVE_DATA:
    (DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)

Function definitions#

Data loading#

adata = read_as_dask(store=DATA_DIR / DATASET / "raw" / "adata.zarr", layers=[])
adata
AnnData object with n_obs × n_vars = 150000 × 6
    obs: 'true_time', 'dataset'
    uns: '0', '1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '6', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '7', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '8', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99'

Velocity pipeline#

grn_correlation = []

for dataset in tqdm(adata.obs["dataset"].cat.categories):
    adata_subset = get_data_subset(adata=adata, column="dataset", group=dataset, uns_keys=["true_K"])

    grn_estimate = adata_subset.to_df().corr().abs().values
    grn_correlation.append(get_grn_auroc(ground_truth=adata_subset.uns["true_K"], estimated=grn_estimate))
100%|██████████| 100/100 [00:01<00:00, 90.77it/s]

Data saving#

if SAVE_DATA:
    pd.DataFrame({"grn": grn_correlation}).to_parquet(
        path=DATA_DIR / DATASET / "results" / "correlation_correlation.parquet"
    )