Performance comparison of inference on dyngen data

Performance comparison of inference on dyngen data#

Notebook compares metrics for velocity, latent time and GRN inference across different methods applied to dyngen-generated data.

Library imports#

import pandas as pd
from scipy.stats import ttest_ind

import matplotlib.pyplot as plt
import mplscience
import seaborn as sns

from rgv_tools import DATA_DIR, FIG_DIR
from rgv_tools.core import METHOD_PALETTE
from rgv_tools.plotting._significance import add_significance, get_significance

General settings#

DATASET = "dyngen"
SAVE_FIG = True
if SAVE_FIG:
    (FIG_DIR / DATASET).mkdir(parents=True, exist_ok=True)

Constants#

VELOCITY_METHODS = ["regvelo", "velovi", "scvelo", "unitvelo", "velovae_vae", "velovae_fullvb", "tfvelo", "cell2fate"]
TIME_METHODS = ["regvelo", "velovi", "scvelo"]
GRN_METHODS = ["regvelo", "correlation", "grnboost2", "celloracle", "tfvelo", "splicejac"]

Data loading#

correlation_df = []
time_df = []
grn_df = []

for method in VELOCITY_METHODS:
    df = pd.read_parquet(DATA_DIR / DATASET / "results" / f"{method}_correlation.parquet")
    df.columns = f"{method}_" + df.columns
    correlation_df.append(df)

for method in TIME_METHODS:
    df = pd.read_parquet(DATA_DIR / DATASET / "results" / f"{method}_correlation.parquet")
    df.columns = f"{method}_" + df.columns
    time_df.append(df)

for method in GRN_METHODS:
    df = pd.read_parquet(DATA_DIR / DATASET / "results" / f"{method}_correlation.parquet")
    df.columns = f"{method}_" + df.columns
    grn_df.append(df)
correlation_df = pd.concat(correlation_df, axis=1)
time_df = pd.concat(time_df, axis=1)
grn_df = pd.concat(grn_df, axis=1)

Analysis#

Time#

df = time_df.loc[:, time_df.columns.str.contains("time")]
df.columns = df.columns.str.removesuffix("_time")
df = pd.melt(df, var_name="method", value_name="correlation")

with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.violinplot(
        data=df, x="correlation", y="method", hue="method", order=TIME_METHODS, palette=METHOD_PALETTE, ax=ax
    )

    ttest_res = ttest_ind(
        time_df["regvelo_time"],
        time_df["velovi_time"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=1,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ttest_res = ttest_ind(
        correlation_df["regvelo_time"],
        correlation_df["scvelo_time"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=2,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ax.set(
        xlabel="Spearman correlation",
        ylabel="Method",
        yticks=ax.get_yticks(),
        yticklabels=["RegVelo", "veloVI", "scVelo"],
    )

    if SAVE_FIG:
        fig.savefig(
            FIG_DIR / DATASET / "time_benchmark.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    plt.show()
../_images/05ef81277d66b7504d48431dbd99e0ab6fe6afea8e13fa0efc4ab48ab940f43f.png

Velocity#

default_color = "#949494"
# Update METHOD_PALETTE with new methods if not present
for method in [method for method in VELOCITY_METHODS if method not in METHOD_PALETTE]:
    METHOD_PALETTE.setdefault(method, default_color)
df = correlation_df.loc[:, correlation_df.columns.str.contains("velocity")]
df.columns = df.columns.str.removesuffix("_velocity")
df = pd.melt(df, var_name="method", value_name="correlation")
df["correlation"] = (df["correlation"] + 1) / 2

with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.violinplot(
        data=df, x="correlation", y="method", hue="method", order=VELOCITY_METHODS, palette=METHOD_PALETTE, ax=ax
    )

    ttest_res = ttest_ind(
        correlation_df["regvelo_velocity"],
        correlation_df["velovi_velocity"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=1,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ttest_res = ttest_ind(
        correlation_df["regvelo_velocity"],
        correlation_df["scvelo_velocity"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=2,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ax.set(
        xlabel="Pearson correlation",
        ylabel="Method",
        yticks=ax.get_yticks(),
        yticklabels=[
            "RegVelo",
            "veloVI",
            "scVelo",
            "UniTVelo",
            "veloVAE (VAE)",
            "veloVAE (fullvb)",
            "TFvelo",
            "cell2fate",
        ],
    )

    if SAVE_FIG:
        fig.savefig(
            FIG_DIR / DATASET / "velocity_benchmark.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    plt.show()
../_images/174767fbe4740f20cb425db13b89932756cd5dbee82dd81630f2983306b706f9.png

GRN#

default_color = "#949494"
# Update METHOD_PALETTE with new methods if not present
for method in [method for method in GRN_METHODS if method not in METHOD_PALETTE]:
    METHOD_PALETTE.setdefault(method, default_color)
df = grn_df.loc[:, grn_df.columns.str.contains("grn")]
df.columns = df.columns.str.removesuffix("_grn")
df = pd.melt(df, var_name="method").rename(columns={"value": "correlation"})

with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.violinplot(data=df, x="correlation", y="method", hue="method", order=GRN_METHODS, palette=METHOD_PALETTE, ax=ax)

    ttest_res = ttest_ind(
        grn_df["regvelo_grn"],
        grn_df["correlation_grn"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=1,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ax.set(
        xlabel="AUROC",
        ylabel="Method",
        yticks=ax.get_yticks(),
        yticklabels=["RegVelo", "Correlation", "GRNBoost2", "CellOracle", "TFvelo", "SpliceJAC"],
    )

    if SAVE_FIG:
        fig.savefig(
            FIG_DIR / DATASET / "grn_benchmark.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    plt.show()
../_images/025368ab9d017d0d863d534ee9053ca055b6e3fd8751b3777e687944cce81a72.png