GRNBoost2 benchmark on cell cycle

GRNBoost2 benchmark on cell cycle#

Notebook benchmarks GRN inference using GRNBoost2 on cell cycling dataset

Library imports#

import pandas as pd

import anndata as ad
from arboreto.algo import grnboost2

from rgv_tools import DATA_DIR
from rgv_tools.benchmarking import get_grn_auroc_cc
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_csv from `anndata` is deprecated. Import anndata.io.read_csv instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_excel from `anndata` is deprecated. Import anndata.io.read_excel instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_hdf from `anndata` is deprecated. Import anndata.io.read_hdf instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_loom from `anndata` is deprecated. Import anndata.io.read_loom instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_mtx from `anndata` is deprecated. Import anndata.io.read_mtx instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_text from `anndata` is deprecated. Import anndata.io.read_text instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_umi_tools from `anndata` is deprecated. Import anndata.io.read_umi_tools instead.
  warnings.warn(msg, FutureWarning)

Constants#

DATASET = "cell_cycle"
SAVE_DATA = True
if SAVE_DATA:
    (DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)

Data loading#

adata = ad.io.read_h5ad(DATA_DIR / DATASET / "processed" / "adata_processed.h5ad")
adata
AnnData object with n_obs × n_vars = 1146 × 395
    obs: 'phase', 'fucci_time', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts'
    var: 'ensum_id', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes'
    uns: 'log1p', 'neighbors', 'pca', 'umap', 'velocity_params'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs', 'true_skeleton'
    layers: 'Ms', 'Mu', 'spliced', 'total', 'unspliced', 'velocity'
    obsp: 'connectivities', 'distances'

GRN pipeline#

network = grnboost2(expression_data=adata.to_df(layer="Ms"), tf_names=adata.var_names.to_list())
grn_estimate = pd.pivot(network, index="target", columns="TF").fillna(0).values

grn_correlation = [get_grn_auroc_cc(ground_truth=adata.varm["true_skeleton"].toarray(), estimated=grn_estimate.T)]
2024-12-10 21:28:11,403 - INFO - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
2024-12-10 21:28:11,450 - INFO - State start
2024-12-10 21:28:11,456 - INFO -   Scheduler at:      tcp://127.0.0.1:8051
2024-12-10 21:28:11,457 - INFO -   dashboard at:  http://127.0.0.1:8493/status
2024-12-10 21:28:11,457 - INFO - Registering Worker plugin shuffle
2024-12-10 21:28:11,473 - INFO -         Start Nanny at: 'tcp://127.0.0.1:8117'
2024-12-10 21:28:11,522 - INFO -         Start Nanny at: 'tcp://127.0.0.1:8461'
2024-12-10 21:28:11,526 - INFO -         Start Nanny at: 'tcp://127.0.0.1:8485'
2024-12-10 21:28:12,750 - INFO - Register worker <WorkerState 'tcp://127.0.0.1:8053', name: 0, status: init, memory: 0, processing: 0>
2024-12-10 21:28:12,753 - INFO - Starting worker compute stream, tcp://127.0.0.1:8053
2024-12-10 21:28:12,754 - INFO - Starting established connection to tcp://127.0.0.1:8212
2024-12-10 21:28:12,913 - INFO - Register worker <WorkerState 'tcp://127.0.0.1:8143', name: 1, status: init, memory: 0, processing: 0>
2024-12-10 21:28:12,914 - INFO - Starting worker compute stream, tcp://127.0.0.1:8143
2024-12-10 21:28:12,915 - INFO - Starting established connection to tcp://127.0.0.1:8220
2024-12-10 21:28:12,935 - INFO - Register worker <WorkerState 'tcp://127.0.0.1:8273', name: 2, status: init, memory: 0, processing: 0>
2024-12-10 21:28:12,936 - INFO - Starting worker compute stream, tcp://127.0.0.1:8273
2024-12-10 21:28:12,937 - INFO - Starting established connection to tcp://127.0.0.1:8228
2024-12-10 21:28:12,965 - INFO - Receive client connection: Client-475422a7-b735-11ef-81d3-00001049fe80
2024-12-10 21:28:12,966 - INFO - Starting established connection to tcp://127.0.0.1:8240
2024-12-10 21:29:21,534 - INFO - Remove client Client-475422a7-b735-11ef-81d3-00001049fe80
2024-12-10 21:29:21,534 - INFO - Received 'close-stream' from tcp://127.0.0.1:8240; closing.
2024-12-10 21:29:21,535 - INFO - Remove client Client-475422a7-b735-11ef-81d3-00001049fe80
2024-12-10 21:29:21,537 - INFO - Close client connection: Client-475422a7-b735-11ef-81d3-00001049fe80
2024-12-10 21:29:21,540 - INFO - Closing Nanny at 'tcp://127.0.0.1:8117'. Reason: nanny-close
2024-12-10 21:29:21,540 - INFO - Nanny asking worker to close. Reason: nanny-close
2024-12-10 21:29:21,541 - INFO - Closing Nanny at 'tcp://127.0.0.1:8461'. Reason: nanny-close
2024-12-10 21:29:21,542 - INFO - Nanny asking worker to close. Reason: nanny-close
2024-12-10 21:29:21,542 - INFO - Closing Nanny at 'tcp://127.0.0.1:8485'. Reason: nanny-close
2024-12-10 21:29:21,544 - INFO - Nanny asking worker to close. Reason: nanny-close
2024-12-10 21:29:21,547 - INFO - Received 'close-stream' from tcp://127.0.0.1:8212; closing.
2024-12-10 21:29:21,548 - INFO - Received 'close-stream' from tcp://127.0.0.1:8220; closing.
2024-12-10 21:29:21,550 - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:8053', name: 0, status: closing, memory: 0, processing: 0> (stimulus_id='handle-worker-cleanup-1733862561.5502915')
2024-12-10 21:29:21,551 - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:8143', name: 1, status: closing, memory: 0, processing: 0> (stimulus_id='handle-worker-cleanup-1733862561.5514474')
2024-12-10 21:29:21,553 - INFO - Received 'close-stream' from tcp://127.0.0.1:8228; closing.
2024-12-10 21:29:21,554 - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:8273', name: 2, status: closing, memory: 0, processing: 0> (stimulus_id='handle-worker-cleanup-1733862561.5540197')
2024-12-10 21:29:21,554 - INFO - Lost all workers
2024-12-10 21:29:22,101 - INFO - Scheduler closing due to unknown reason...
2024-12-10 21:29:22,102 - INFO - Scheduler closing all comms
if SAVE_DATA:
    pd.DataFrame({"grn": grn_correlation}).to_parquet(
        path=DATA_DIR / DATASET / "results" / "grnboost2_correlation.parquet"
    )