Performance comparison of inference on toy GRN data

Performance comparison of inference on toy GRN data#

Notebook compares metrics for velocity, latent time and GRN inference across different methods applied to toy GRN data.

Library imports#

import pandas as pd
from scipy.stats import ttest_ind

import matplotlib.pyplot as plt
import mplscience
import seaborn as sns

from rgv_tools import DATA_DIR, FIG_DIR
from rgv_tools.core import METHOD_PALETTE
from rgv_tools.plotting._significance import add_significance, get_significance

General settings#

SAVE_FIGURES = False
if SAVE_FIGURES:
    (FIG_DIR / "toy_grn").mkdir(parents=True, exist_ok=True)

FIGURE_FORMATE = "svg"

Constants#

VELOCITY_METHODS = ["regvelo", "velovi", "scvelo"]
TIME_METHODS = ["regvelo", "velovi", "scvelo", "dpt"]
GRN_METHODS = ["regvelo", "correlation", "grnboost2", "celloracle"]

Data loading#

correlation_df = []

for method in VELOCITY_METHODS:
    df = pd.read_parquet(DATA_DIR / "toy_grn" / "results" / f"{method}_correlation.parquet")
    df.columns = f"{method}_" + df.columns
    correlation_df.append(df)
del df

for method in TIME_METHODS:
    if method in VELOCITY_METHODS:
        continue
    df = pd.read_parquet(DATA_DIR / "toy_grn" / "results" / f"{method}_correlation.parquet")
    df.columns = f"{method}_" + df.columns
    correlation_df.append(df)

for method in GRN_METHODS:
    if method in VELOCITY_METHODS + TIME_METHODS:
        continue
    df = pd.read_parquet(DATA_DIR / "toy_grn" / "results" / f"{method}_correlation.parquet")
    df.columns = f"{method}_" + df.columns
    correlation_df.append(df)

correlation_df = pd.concat(correlation_df, axis=1)
correlation_df.head()
regvelo_velocity regvelo_time regvelo_grn velovi_velocity velovi_time scvelo_velocity scvelo_time dpt_time correlation_grn grnboost2_grn celloracle_grn
0 0.555112 0.564590 0.530 0.523382 -0.462360 0.461017 0.466543 -0.081997 0.59 0.475 0.470
1 0.747088 0.861632 0.725 0.565394 0.810599 0.621778 0.779928 -0.008727 0.56 0.530 0.555
2 0.747022 0.786513 0.550 0.719559 0.613784 0.710150 0.833685 0.012410 0.71 0.565 0.520
3 0.795365 0.604589 0.530 0.706493 0.125379 0.765667 0.678434 0.824321 0.62 0.620 0.500
4 0.696613 0.914910 0.830 0.680337 0.199152 0.646673 0.718848 -0.102730 0.69 0.550 0.595

Analysis#

Velocity#

df = correlation_df.loc[:, correlation_df.columns.str.contains("velocity")]
df.columns = df.columns.str.removesuffix("_velocity")
df = pd.melt(df, var_name="method", value_name="correlation")

with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.violinplot(
        data=df, x="correlation", y="method", hue="method", order=VELOCITY_METHODS, palette=METHOD_PALETTE, ax=ax
    )

    ttest_res = ttest_ind(
        correlation_df["regvelo_velocity"],
        correlation_df["velovi_velocity"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=1,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ttest_res = ttest_ind(
        correlation_df["regvelo_velocity"],
        correlation_df["scvelo_velocity"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=2,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ax.set(
        xlabel="Pearson correlation",
        ylabel="Method",
        yticks=ax.get_yticks(),
        yticklabels=["RegVelo", "veloVI", "scVelo"],
    )

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / "toy_GRN" / "velocity_benchmark.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    plt.show()
../_images/7367cce389e808bfe1a3884bdeb63110d3a5142b630acbf63ab462b3c2e5ea4a.png

Latent time#

df = correlation_df.loc[:, correlation_df.columns.str.contains("time")]
df.columns = df.columns.str.removesuffix("_time")
df = pd.melt(df, var_name="method", value_name="correlation")

with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.violinplot(
        data=df, x="correlation", y="method", hue="method", order=TIME_METHODS, palette=METHOD_PALETTE, ax=ax
    )

    ttest_res = ttest_ind(
        correlation_df["regvelo_time"],
        correlation_df["velovi_time"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=1,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ttest_res = ttest_ind(
        correlation_df["velovi_time"],
        correlation_df["scvelo_time"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=2,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ax.set(
        xlabel="Spearman correlation",
        ylabel="Method",
        yticks=ax.get_yticks(),
        yticklabels=["RegVelo", "veloVI", "scVelo", "DPT"],
    )

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / "toy_GRN" / "time_benchmark.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    plt.show()
../_images/ef77cd52790057e9040214825c9b56ae87520dff5198f70aed90eab907efa896.png

GRN#

df = correlation_df.loc[:, correlation_df.columns.str.contains("grn")]
df.columns = df.columns.str.removesuffix("_grn")
df = pd.melt(df, var_name="method").rename(columns={"value": "correlation"})

with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.violinplot(data=df, x="correlation", y="method", hue="method", order=GRN_METHODS, palette=METHOD_PALETTE, ax=ax)

    ttest_res = ttest_ind(
        correlation_df["regvelo_grn"],
        correlation_df["correlation_grn"],
        equal_var=False,
        alternative="greater",
    )
    significance = get_significance(pvalue=ttest_res.pvalue)
    add_significance(
        ax=ax,
        left=0,
        right=1,
        significance=significance,
        lw=1,
        bracket_level=1.05,
        c="k",
        level=0,
        orientation="vertical",
    )

    ax.set(
        xlabel="AUROC",
        ylabel="Method",
        yticks=ax.get_yticks(),
        yticklabels=["RegVelo", "Correlation", "GRNBoost2", "CellOracle"],
    )

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / "toy_GRN" / "grn_benchmark.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    plt.show()
../_images/124cffc4fc5505d1f96055664261af803571a4000dec89a89695ea2c815b3f57.png