GRNBoost2 benchmark on toy GRN#
Notebook benchmarks GRN inference using GRNBoost2 on toy GRN data.
Library imports#
from tqdm import tqdm
import pandas as pd
from arboreto.algo import grnboost2
from rgv_tools import DATA_DIR
from rgv_tools.benchmarking import get_data_subset, get_grn_auroc
from rgv_tools.core import read_as_dask
General settings#
"""
from dask import config as cfg
cfg.set({"distributed.scheduler.worker-ttl": None})
"""
Constants#
DATASET = "toy_grn"
SAVE_DATA = True
if SAVE_DATA:
(DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)
Function definitions#
Data loading#
adata = read_as_dask(store=DATA_DIR / DATASET / "raw" / "adata.zarr", layers=[])
adata
Velocity pipeline#
grn_correlation = []
for dataset in tqdm(adata.obs["dataset"].cat.categories):
adata_subset = get_data_subset(adata=adata, column="dataset", group=dataset, uns_keys=["true_K"])
network = grnboost2(expression_data=adata_subset.to_df(), tf_names=adata.var_names.to_list())
grn_estimate = pd.pivot(network, index="target", columns="TF").fillna(0).values
grn_correlation.append(get_grn_auroc(ground_truth=adata_subset.uns["true_K"], estimated=grn_estimate))
Data saving#
if SAVE_DATA:
pd.DataFrame({"grn": grn_correlation}).to_parquet(
path=DATA_DIR / DATASET / "results" / "grnboost2_correlation.parquet"
)