scVelo benchmark on dyngen data#
Notebook benchmarks velocity and latent time inference using scVelo on dyngen-generated data.
Library imports#
import numpy as np
import pandas as pd
import anndata as ad
import scvelo as scv
from rgv_tools import DATA_DIR
from rgv_tools.benchmarking import get_time_correlation, get_velocity_correlation
General settings#
scv.settings.verbosity = 3
Constants#
DATASET = "dyngen"
COMPLEXITY = "complexity_1"
SAVE_DATA = True
if SAVE_DATA:
(DATA_DIR / DATASET / COMPLEXITY / "results").mkdir(parents=True, exist_ok=True)
SAVE_DATASETS = True
if SAVE_DATASETS:
(DATA_DIR / DATASET / COMPLEXITY / "trained_scvelo").mkdir(parents=True, exist_ok=True)
Velocity pipeline#
velocity_correlation = []
time_correlation = []
cnt = 0
for filename in (DATA_DIR / DATASET / COMPLEXITY / "processed").iterdir():
if filename.suffix != ".zarr":
continue
simulation_id = int(filename.stem.removeprefix("simulation_"))
print(f"Run {cnt}, dataset {simulation_id}.")
adata = ad.io.read_zarr(filename)
# Parameter inference
scv.tl.recover_dynamics(adata, fit_scaling=False, var_names=adata.var_names, n_jobs=1)
# Velocity inference
adata.var["fit_scaling"] = 1.0
scv.tl.velocity(adata, mode="dynamical", min_likelihood=-np.inf, min_r2=None)
# save data
adata.write_zarr(DATA_DIR / DATASET / COMPLEXITY / "trained_scvelo" / f"trained_{simulation_id}.zarr")
velocity_correlation.append(
get_velocity_correlation(
ground_truth=adata.layers["true_velocity"], estimated=adata.layers["velocity"], aggregation=np.mean
)
)
## calculate per gene latent time correlation
time_corr = [
get_time_correlation(ground_truth=adata.obs["true_time"], estimated=adata.layers["fit_t"][:, i])
for i in range(adata.layers["fit_t"].shape[1])
]
time_correlation.append(np.mean(time_corr))
cnt += 1
Run 0, dataset 29.
recovering dynamics (using 1/128 cores)
finished (0:00:25) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 1, dataset 14.
recovering dynamics (using 1/128 cores)
finished (0:00:46) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 2, dataset 24.
recovering dynamics (using 1/128 cores)
finished (0:00:56) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 3, dataset 28.
recovering dynamics (using 1/128 cores)
finished (0:00:40) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 4, dataset 6.
recovering dynamics (using 1/128 cores)
finished (0:00:55) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 5, dataset 21.
recovering dynamics (using 1/128 cores)
finished (0:00:33) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 6, dataset 15.
recovering dynamics (using 1/128 cores)
finished (0:00:31) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 7, dataset 9.
recovering dynamics (using 1/128 cores)
finished (0:00:26) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 8, dataset 12.
recovering dynamics (using 1/128 cores)
finished (0:00:36) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 9, dataset 19.
recovering dynamics (using 1/128 cores)
finished (0:00:19) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 10, dataset 4.
recovering dynamics (using 1/128 cores)
finished (0:00:34) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 11, dataset 13.
recovering dynamics (using 1/128 cores)
finished (0:00:35) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 12, dataset 2.
recovering dynamics (using 1/128 cores)
finished (0:00:51) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 13, dataset 16.
recovering dynamics (using 1/128 cores)
finished (0:00:38) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 14, dataset 1.
recovering dynamics (using 1/128 cores)
finished (0:00:26) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 15, dataset 18.
recovering dynamics (using 1/128 cores)
finished (0:00:26) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 16, dataset 5.
recovering dynamics (using 1/128 cores)
finished (0:00:20) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 17, dataset 10.
recovering dynamics (using 1/128 cores)
finished (0:00:41) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 18, dataset 8.
recovering dynamics (using 1/128 cores)
finished (0:00:32) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 19, dataset 11.
recovering dynamics (using 1/128 cores)
finished (0:00:26) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 20, dataset 27.
recovering dynamics (using 1/128 cores)
finished (0:00:27) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 21, dataset 23.
recovering dynamics (using 1/128 cores)
finished (0:00:26) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 22, dataset 17.
recovering dynamics (using 1/128 cores)
finished (0:00:32) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 23, dataset 30.
recovering dynamics (using 1/128 cores)
finished (0:00:33) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 24, dataset 22.
recovering dynamics (using 1/128 cores)
finished (0:00:30) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 25, dataset 25.
recovering dynamics (using 1/128 cores)
finished (0:00:41) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 26, dataset 20.
recovering dynamics (using 1/128 cores)
finished (0:00:20) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 27, dataset 7.
recovering dynamics (using 1/128 cores)
finished (0:00:36) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 28, dataset 3.
recovering dynamics (using 1/128 cores)
finished (0:00:16) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Run 29, dataset 26.
recovering dynamics (using 1/128 cores)
finished (0:00:35) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
Data saving#
if SAVE_DATA:
pd.DataFrame({"velocity": velocity_correlation, "time": time_correlation}).to_parquet(
path=DATA_DIR / DATASET / COMPLEXITY / "results" / "scvelo_correlation.parquet" # omit ori if no NaN values
)