Performance comparison of inference on cell cycle

Performance comparison of inference on cell cycle#

Notebook compares metrics for velocity, latent time and GRN inference across different methods applied to cell cycle data.

Library imports#

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import mplscience
import seaborn as sns

from rgv_tools import DATA_DIR, FIG_DIR

General settings#

DATASET = "cell_cycle"
SAVE_FIGURES = True
if SAVE_FIGURES:
    (FIG_DIR / DATASET).mkdir(parents=True, exist_ok=True)

FIGURE_FORMATE = "svg"

Constants#

NN_SCALE = [10, 30, 50, 70, 90, 100]
VELO_METHODS = ["regvelo", "velovi"]
VELO_METHOD_PALETTE = {
    "regvelo": "#0173b2",
    "velovi": "#de8f05",
}

Velocity loading#

## Velocity
confi_df = []

for scale in NN_SCALE:
    df = pd.read_parquet(DATA_DIR / "results" / f"regvelo_confidence_velocity_{scale}.parquet")
    df["scale"] = scale
    confi_df.append(df)

confi_df = pd.concat(confi_df, axis=0)

Confidence#

confi_df
velocity_consistency Dataset Method scale
0 0.941898 Cell cycle regvelo 10
1 0.951289 Cell cycle regvelo 10
2 0.904966 Cell cycle regvelo 10
3 0.541206 Cell cycle regvelo 10
4 0.886754 Cell cycle regvelo 10
... ... ... ... ...
1141 0.981466 Cell cycle regvelo 100
1142 0.918879 Cell cycle regvelo 100
1143 0.921603 Cell cycle regvelo 100
1144 0.963663 Cell cycle regvelo 100
1145 0.952200 Cell cycle regvelo 100

6876 rows × 4 columns

with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(4, 3), sharey=True)
    sns.violinplot(
        data=confi_df,
        ax=ax,
        # orient="h",
        x="scale",
        y="velocity_consistency",
        color="grey",
        order=NN_SCALE,
    )
    # plt.legend(title='', loc='lower center', bbox_to_anchor=(0.5, -0.6), ncol=3)
    ax.set_yticks([0, 0.25, 0.5, 0.75, 1])
    ax.set_yticklabels([0, 0.25, 0.5, 0.75, 1])
    ax.set_ylabel("Velocity confidence", fontsize=14)
    ax.set_xlabel("")
    plt.show()

    if SAVE_FIGURES:
        fig.savefig(FIG_DIR / "velocity_confidence_compare.svg", format="svg", transparent=True, bbox_inches="tight")
    plt.show()
../_images/541385274a3ad8f2adad8d38fe5d1cb656cdb6f4f34c46d2b9617f0cf66e9f88.png

Compare on each level#

confi_dfs = []
for scale in NN_SCALE:
    confi_df = []
    for method in VELO_METHODS:
        df = pd.read_parquet(DATA_DIR / "results" / f"{method}_confidence_velocity_{scale}_nn30.parquet")
        confi_df.append(df)

    confi_df = pd.concat(confi_df, axis=0)
    confi_df["Scale"] = scale
    confi_dfs.append(confi_df)
confi_dfs = pd.concat(confi_dfs)
with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(8, 2.5))
    sns.violinplot(
        data=confi_dfs,
        y="velocity_consistency",
        x="Scale",
        hue="Method",
        hue_order=["regvelo", "velovi"],
        palette=VELO_METHOD_PALETTE,
        ax=ax,
    )

    ax.set(
        xlabel="Velocity consistency",
        ylabel="Scale",
        yticks=ax.get_yticks(),
    )
    ax.set_ylim(0, 1.1)

    fig.savefig(
        FIG_DIR / "velocity_confidence.svg",
        format="svg",
        transparent=True,
        bbox_inches="tight",
    )

    plt.show()
../_images/b9d54c178c8b3112c84df2cc5d54d81aa17f5f64d6238b03871cbc1b9ad3b258.png
confi_dfs_velo = confi_dfs.copy()
confi_dfs = []
for scale in NN_SCALE:
    confi_df = []
    for method in VELO_METHODS:
        df = pd.read_parquet(DATA_DIR / "results" / f"{method}_confidence_time_{scale}_nn30.parquet")
        confi_df.append(df)

    confi_df = pd.concat(confi_df, axis=0)
    confi_df["Scale"] = scale
    confi_dfs.append(confi_df)
confi_dfs = pd.concat(confi_dfs)
with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(8, 2.5))
    sns.violinplot(
        data=confi_dfs,
        y="fit_t_consistency",
        x="Scale",
        hue="Method",
        hue_order=["regvelo", "velovi"],
        palette=VELO_METHOD_PALETTE,
        ax=ax,
    )

    ax.set(
        xlabel="Time consistency",
        ylabel="Scale",
        yticks=ax.get_yticks(),
    )
    ax.set_ylim(0, 1.1)

    fig.savefig(
        FIG_DIR / "time_confidence.svg",
        format="svg",
        transparent=True,
        bbox_inches="tight",
    )

    plt.show()
../_images/c4db5df0155f1e8dd4ebc38ca734745b189c6d7c193d783000559d5a092a5944.png
confi_dfs_time = confi_dfs.copy()
confi_dfs_velo_ratio = []
for scale in np.unique(confi_dfs_velo["Scale"]):
    dat = pd.DataFrame()
    repeat = int(int(np.sum(confi_dfs_velo["Scale"] == scale)) / 2)
    dat["Scale"] = [scale] * repeat
    velo_reg = confi_dfs_velo.loc[
        (confi_dfs_velo["Scale"] == scale) * (confi_dfs_velo["Method"] == "regvelo"), "velocity_consistency"
    ]
    velo_vi = confi_dfs_velo.loc[
        (confi_dfs_velo["Scale"] == scale) * (confi_dfs_velo["Method"] == "velovi"), "velocity_consistency"
    ]
    dat["Ratio"] = velo_reg / velo_vi
    dat["Class"] = ["velocity"] * repeat
    confi_dfs_velo_ratio.append(dat)
confi_dfs_time_ratio = []
for scale in np.unique(confi_dfs_time["Scale"]):
    dat = pd.DataFrame()
    repeat = int(int(np.sum(confi_dfs_time["Scale"] == scale)) / 2)
    dat["Scale"] = [scale] * repeat
    time_reg = confi_dfs_time.loc[
        (confi_dfs_time["Scale"] == scale) * (confi_dfs_time["Method"] == "regvelo"), "fit_t_consistency"
    ]
    time_vi = confi_dfs_time.loc[
        (confi_dfs_time["Scale"] == scale) * (confi_dfs_time["Method"] == "velovi"), "fit_t_consistency"
    ]
    dat["Ratio"] = time_reg / time_vi
    dat["Class"] = ["time"] * repeat
    confi_dfs_time_ratio.append(dat)
confi_dfs_velo_ratio = pd.concat(confi_dfs_velo_ratio)
confi_dfs_time_ratio = pd.concat(confi_dfs_time_ratio)
confi_df_all = pd.concat([confi_dfs_velo_ratio, confi_dfs_time_ratio])
confi_df_all
Scale Ratio Class
0 10 1.111715 velocity
1 10 1.087845 velocity
2 10 1.113454 velocity
3 10 0.976719 velocity
4 10 1.001859 velocity
... ... ... ...
1141 100 1.124350 time
1142 100 1.139394 time
1143 100 1.109670 time
1144 100 1.142055 time
1145 100 1.176460 time

13752 rows × 3 columns

confi_df_all["Scale"] = confi_df_all["Scale"].astype(str)
confi_df_all["Ratio"] = confi_df_all["Ratio"].astype(np.float32)
confi_df_all["Ratio"] = np.log2(confi_df_all["Ratio"])
custom_palette = {
    "velocity": "#7D7C78",  # Elegant blue (colorblind-friendly)
    "time": "#BCAE6C",  # Gold-orange (also friendly and high contrast)
}

with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(4, 3), sharey=True)
    sns.violinplot(
        data=confi_df_all,
        ax=ax,
        y="Ratio",
        x="Scale",
        hue="Class",
        palette=custom_palette,
    )
    # plt.legend(title='', loc='lower center', bbox_to_anchor=(0.5, -0.6), ncol=3)
    ax.set_ylim(-0.5, 0.5)
    # ax.set_yticklabels([0,0.25, 0.5, 0.75,1]);
    ax.set_xlabel("Confidence ratio", fontsize=14)
    ax.set_ylabel("")
    plt.show()

    if SAVE_FIGURES:
        fig.savefig(FIG_DIR / "confi_all_kinetics.svg", format="svg", transparent=True, bbox_inches="tight")
    plt.show()
../_images/d68c70082456a4adad7f1874aa603038109f30174f3707225ba71ce19f97e6a6.png