CellOracle benchmark on cell cycle#
Notebook benchmarks GRN inference using CellOracle on cell cycling dataset
Library imports#
import numpy as np
import pandas as pd
import anndata as ad
import celloracle as co
from rgv_tools import DATA_DIR
from rgv_tools.benchmarking import get_grn_auroc_cc
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_csv from `anndata` is deprecated. Import anndata.io.read_csv instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_excel from `anndata` is deprecated. Import anndata.io.read_excel instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_hdf from `anndata` is deprecated. Import anndata.io.read_hdf instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_loom from `anndata` is deprecated. Import anndata.io.read_loom instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_mtx from `anndata` is deprecated. Import anndata.io.read_mtx instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_text from `anndata` is deprecated. Import anndata.io.read_text instead.
warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_umi_tools from `anndata` is deprecated. Import anndata.io.read_umi_tools instead.
warnings.warn(msg, FutureWarning)
Constants#
DATASET = "cell_cycle"
SAVE_DATA = True
if SAVE_DATA:
(DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)
Data loading#
adata = ad.io.read_h5ad(DATA_DIR / DATASET / "processed" / "adata_processed.h5ad")
adata
AnnData object with n_obs × n_vars = 1146 × 395
obs: 'phase', 'fucci_time', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts'
var: 'ensum_id', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes'
uns: 'log1p', 'neighbors', 'pca', 'umap', 'velocity_params'
obsm: 'X_pca', 'X_umap'
varm: 'PCs', 'true_skeleton'
layers: 'Ms', 'Mu', 'spliced', 'total', 'unspliced', 'velocity'
obsp: 'connectivities', 'distances'
GRN pipeline#
base_grn = np.ones((adata.n_vars, adata.n_vars))
base_grn = pd.DataFrame(base_grn, columns=adata.var_names)
base_grn["peak_id"] = "peak_" + adata.var_names
base_grn["gene_short_name"] = adata.var_names
base_grn = base_grn[["peak_id", "gene_short_name"] + adata.var_names.to_list()]
net = co.Net(gene_expression_matrix=adata.to_df(layer="Ms"), TFinfo_matrix=base_grn, verbose=False)
net.fit_All_genes(bagging_number=100, alpha=1, verbose=False)
net.updateLinkList(verbose=False)
grn_estimate = pd.pivot(net.linkList[["source", "target", "coef_mean"]], index="target", columns="source")
grn_estimate = grn_estimate.fillna(0).abs().values
grn_correlation = [get_grn_auroc_cc(ground_truth=adata.varm["true_skeleton"].toarray(), estimated=grn_estimate.T)]
if SAVE_DATA:
pd.DataFrame({"grn": grn_correlation}).to_parquet(
path=DATA_DIR / DATASET / "results" / "celloracle_correlation.parquet"
)