Perturb-seq data analysis#

Notebook for analyzing zebrafish Perturb-seq data.

Library imports#

import numpy as np
import pandas as pd
import scipy

import matplotlib.pyplot as plt
import mplscience
import seaborn as sns

import scanpy as sc
import scvelo as scv

from rgv_tools import DATA_DIR, FIG_DIR
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_csv from `anndata` is deprecated. Import anndata.io.read_csv instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_excel from `anndata` is deprecated. Import anndata.io.read_excel instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_hdf from `anndata` is deprecated. Import anndata.io.read_hdf instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_loom from `anndata` is deprecated. Import anndata.io.read_loom instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_mtx from `anndata` is deprecated. Import anndata.io.read_mtx instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_text from `anndata` is deprecated. Import anndata.io.read_text instead.
  warnings.warn(msg, FutureWarning)
/home/icb/weixu.wang/miniconda3/envs/regvelo_test/lib/python3.10/site-packages/anndata/utils.py:429: FutureWarning: Importing read_umi_tools from `anndata` is deprecated. Import anndata.io.read_umi_tools instead.
  warnings.warn(msg, FutureWarning)

General setting#

plt.rcParams["svg.fonttype"] = "none"
sns.reset_defaults()
sns.reset_orig()
scv.settings.set_figure_params("scvelo", dpi_save=400, dpi=80, transparent=True, fontsize=14, color_map="viridis")

Constants#

DATASET = "zebrafish"
SAVE_FIGURES = True
if SAVE_FIGURES:
    (FIG_DIR / DATASET).mkdir(parents=True, exist_ok=True)

Data loading#

adata = sc.read_h5ad(DATA_DIR / DATASET / "raw" / "seu_NC_clustered_normalized.h5ad")
ss3 = sc.read_h5ad(DATA_DIR / DATASET / "processed" / "adata_preprocessed.h5ad")
metadata = pd.read_csv(DATA_DIR / DATASET / "raw" / "metadata.csv", index_col=0)
adata = adata[metadata.index.tolist()]
adata.obs = metadata

Visualize via UMAP#

# Using MELD to calculate sample-associated density estimates and relative likelihood
metadata = adata.obs
metadata["genotype_name"] = metadata["sgRNA_group"]
metadata["genotype"] = metadata["sgRNA_group"]
metadata["replicate"] = "A"
metadata
orig.ident nCount_RNA nFeature_RNA sample percent.mt RNA_snn_res.1 seurat_clusters sgRNA_group DoubletFinder proj RNA_snn_res.0.8 cell_anno latent_time_imputed latent_time_imputed_scvi term_state_imputed cell_anno_old genotype_name genotype replicate
p1_AAACGGGAGCTAGTTC-1 SeuratProject 9374.373090 3125 p1 0.601578 14 14 fli1a_erf_erfl3 Singlet perturbseq 6 mNC_head_mesenchymal 0.599100 0.567514 NaN mNC_head_mesenchymal fli1a_erf_erfl3 fli1a_erf_erfl3 A
p1_AAACGGGCAAAGAATC-1 SeuratProject 16617.288939 3908 p1 0.752648 2 2 fli1a_erf_erfl3 Singlet perturbseq 9 Pigment_gch2_high 0.761031 0.783371 NaN Pigment fli1a_erf_erfl3 fli1a_erf_erfl3 A
p1_AAACGGGCAATGTAAG-1 SeuratProject 10564.695306 3262 p1 0.888390 17 17 mitfa_tfec_inhouse Singlet perturbseq 4 mNC_hox34 0.364584 0.289242 NaN unclassified2 mitfa_tfec_inhouse mitfa_tfec_inhouse A
p1_AAACGGGCAGCAGTTT-1 SeuratProject 17754.127675 3934 p1 0.579913 19 19 fli1a_erf_erfl3 Singlet perturbseq 7 mNC_arch2 0.236565 0.428227 NaN mNC_arch2 fli1a_erf_erfl3 fli1a_erf_erfl3 A
p1_AAACGGGGTAGCGTAG-1 SeuratProject 15028.838200 3638 p1 0.467387 6 6 fli1a_erf_erfl3 Singlet perturbseq 5 unclassified1 0.268428 0.410988 NaN unclassified1 fli1a_erf_erfl3 fli1a_erf_erfl3 A
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
p11_TTTGCGCAGAGTAATC-1 SeuratProject 6202.170676 2499 p11 0.776390 8 8 ebf3a Singlet perturbseq 0 mNC_vagal 0.387833 0.428689 NaN Pigment_sox6_high_vagel ebf3a ebf3a A
p11_TTTGGTTTCAACTCTT-1 SeuratProject 3644.718485 1857 p11 1.794744 5 5 elf1 Singlet perturbseq 6 mNC_head_mesenchymal 0.538755 0.549123 NaN mNC_head_mesenchymal elf1 elf1 A
p11_TTTGGTTTCCTATTCA-1 SeuratProject 8422.826902 3344 p11 0.510019 25 25 nr2f2 Singlet perturbseq 5 mNC_arch1 0.412674 0.363753 NaN mNC_arch1 nr2f2 nr2f2 A
p11_TTTGGTTTCTAACTCT-1 SeuratProject 6612.583387 2661 p11 0.449287 6 6 ebf3a Singlet perturbseq 15 unclassified1 0.259958 0.374038 NaN unclassified1 ebf3a ebf3a A
p11_TTTGTCAAGGACAGCT-1 SeuratProject 11372.933636 3694 p11 0.594269 14 14 multiplet Singlet perturbseq 6 mNC_head_mesenchymal 0.489412 0.520055 NaN mNC_head_mesenchymal multiplet multiplet A

12393 rows × 19 columns

## Update pigment cell annotation
adata.obs["cell_anno_new"] = adata.obs["cell_anno"].copy()
adata.obs["cell_anno_new"][adata.obs["cell_anno_new"] == "Pigment_gch2_high"] = "Pigment"
perturbseq = adata[
    adata.obs["sgRNA_group"].isin(
        [
            "mitfa_tfec_inhouse",
            "negative",
            "control",
            "mitfa",
            "tfec",
            "tfec_mitfa_bhlhe40",
            "mitfa_tfec",
            "mitfa_tfec_tfeb",
            "elf1",
            "nr2f2",
            "nr2f5",
        ]
    )
].copy()

highlight_cell_types = [
    "mNC_hox34",
    "mNC_head_mesenchymal",
    "mNC_arch2",
    "Pigment",
    "Other",
]  # replace with your cell types of interest

# Create a new column for colors
perturbseq.obs["highlight"] = perturbseq.obs["cell_anno_new"].copy()
palette = dict(zip(ss3.obs["cell_type"].cat.categories, ss3.uns["cell_type_colors"]))
# Set cell types to grey if they are not in the highlight list
perturbseq.obs["highlight"] = perturbseq.obs["highlight"].apply(lambda x: x if x in highlight_cell_types else "Other")

# Create a color map where 'Other' is grey
colors = ["grey" if ct == "Other" else palette[ct] for i, ct in enumerate(highlight_cell_types)]
color_map = {ct: color for ct, color in zip(highlight_cell_types, colors)}
color_map["Other"] = "grey"
with mplscience.style_context():
    fig, ax = plt.subplots(figsize=(7, 5))
    sc.pl.embedding(
        perturbseq, color="highlight", basis="phate", legend_loc=None, palette=color_map, frameon=False, size=30, ax=ax
    )

    if SAVE_FIGURES:
        fig.savefig(FIG_DIR / DATASET / "INTRO_figure_perturb.svg", format="svg", transparent=True, bbox_inches="tight")
    plt.show()
../_images/dc6417570024e253749e61079c279c2f2eba842b0db4e549f839e2e49534e908.png

Visualize nr2f2, tfec, mitfa and elf1 perturbation effect#

sgRNA_groups = ["nr2f2", "tfec", "mitfa", "elf1"]
# Visualize the perturbation on terminal states
cell_types = [
    "mNC_hox34",
    "mNC_head_mesenchymal",
    "mNC_arch2",
    "Pigment",
]

for group in sgRNA_groups:
    sample_likelihoods = pd.read_csv(DATA_DIR / DATASET / "raw" / f"likelihood_{group}.csv", index_col=0)
    condition = (metadata["sgRNA_group"] == "control") | (metadata["sgRNA_group"] == group)
    experimental_samples = [f"{group}A", f"{group}B"]

    df = pd.DataFrame(sample_likelihoods[experimental_samples])
    df[group] = df.mean(1)
    # Calculate the percentiles values

    down = df[group].quantile(0.2)
    up = df[group].quantile(0.8)

    # Apply the condition to set values outside the 5th and 95th percentiles to NaN
    df["quantile_value"] = np.where((df[group] > up) | (df[group] < down), df[group], np.nan)
    df["quantile_value"] = df["quantile_value"] - np.mean(df[group])

    vec = adata[condition].obs["cell_anno_new"].apply(lambda x: 1 if x in cell_types else np.nan)
    df["quantile_value"] = np.array(df["quantile_value"]) * np.array(vec)

    adata_sub = adata[condition].copy()
    adata_sub.obs[f"{group}_score"] = np.array(df["quantile_value"]).copy()

    ## Plot the figure
    overall_score = pd.DataFrame({"score": np.array([np.nan] * adata_sub.shape[0])})
    overall_score.index = adata_sub.obs.index.tolist()
    overall_score.loc[adata_sub.obs.index.tolist(), "score"] = np.array(adata_sub.obs[f"{group}_score"])

    adata_sub.obs[f"{group}_score"] = np.array(overall_score["score"])
    adata_sub.obs["highlight"] = (~np.isnan(adata_sub.obs[f"{group}_score"])).astype(str)

    highlighted_points = adata_sub[adata_sub.obs["highlight"] == "True"]
    x_highlight = highlighted_points.obsm["X_phate"][:, 0]
    y_highlight = highlighted_points.obsm["X_phate"][:, 1]
    color_values = highlighted_points.obs[f"{group}_score"]  # Adjust to your continuous variable

    with mplscience.style_context():  # Use the mplscience style context
        fig, ax = plt.subplots(figsize=(6, 4))

        sc.pl.embedding(adata_sub, basis="phate", color=f"{group}_score", vcenter=0, show=False, ax=ax)
        plt.scatter(
            x_highlight,
            y_highlight,
            c=color_values,
            cmap="viridis",  # Choose a gradient color map, e.g., 'viridis', 'plasma', 'inferno', etc.
            s=20,  # Larger size for highlighted points
            # edgecolor='black',  # Optional: border for emphasis
        )

        if SAVE_FIGURES:
            fig.savefig(
                FIG_DIR / DATASET / f"{group}_perturbation.svg", format="svg", transparent=True, bbox_inches="tight"
            )
        plt.show()
../_images/37ea0d92fc0f0cb1c6e22b224aba02a24eb69edda3d9881c98e8758c23c64b1a.png ../_images/f0bbaf1c26586205607d3b329091e93a800ea2047a134e66e75b6e2b6c23ec38.png ../_images/7e7a6d8b2c593cb02f2635c76454d0a45920093e16ef6f6540f6b572ac86d855.png ../_images/86f14e8c2aba238ed502b513ecbee4362b68b355f29ff51d22d4132421b6e154.png

Visualize differential expressed genes in tfec perturbation panel#

perturbseq = adata.copy()
perturbseq = perturbseq[
    ~perturbseq.obs["cell_anno_new"].isin(["unclassified2", "unclassified1", "Mutant_hox23", "Mutant"]),
]
seu_small = perturbseq[perturbseq.obs["sgRNA_group"].isin(["control", "tfec"]),]
genotype = seu_small.obs["sgRNA_group"].tolist()
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["cdkn1ca"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)
    ax.set_ylim(bottom=-0.1, top=4)

    if SAVE_FIGURES:
        fig.savefig(FIG_DIR / DATASET / "cdkn1ca.svg", format="svg", transparent=True, bbox_inches="tight")

    # Display the plot
    plt.show()
../_images/42924cdaadb34da9e0726c06714f4ab7d752a6d4e1457fe7183ba95d079746c6.png
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["atp6ap2"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)
    ax.set_ylim(bottom=-0.1, top=4)

    if SAVE_FIGURES:
        fig.savefig(FIG_DIR / DATASET / "atp6ap2.svg", format="svg", transparent=True, bbox_inches="tight")

    # Display the plot
    plt.show()
../_images/831b0d57cd858d7d4803577b40cd77aa4f0c7fc2bd35a8b703b50ae33e265f43.png

test significance#

scipy.stats.ttest_ind(
    seu_small[np.array(genotype) == "tfec", "cdkn1ca"].X.toarray().reshape(-1),
    seu_small[np.array(genotype) != "tfec", "cdkn1ca"].X.toarray().reshape(-1),
    equal_var=False,
    alternative="less",
)
TtestResult(statistic=-2.0416193271649687, pvalue=0.021191380003823368, df=219.9875671451483)
scipy.stats.ttest_ind(
    seu_small[np.array(genotype) == "tfec", "atp6ap2"].X.toarray().reshape(-1),
    seu_small[np.array(genotype) != "tfec", "atp6ap2"].X.toarray().reshape(-1),
    equal_var=False,
    alternative="less",
)
TtestResult(statistic=-5.547499181077916, pvalue=3.4438545499844835e-08, df=270.9981680245929)

Visualize differential expressed genes in elf1 perturbation panel#

perturbseq = adata.copy()
perturbseq = perturbseq[
    ~perturbseq.obs["cell_anno_new"].isin(["unclassified2", "unclassified1", "Mutant_hox23", "Mutant"]),
]
seu_small = perturbseq[perturbseq.obs["sgRNA_group"].isin(["control", "elf1"]),]
genotype = seu_small.obs["sgRNA_group"].tolist()
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["mitfa"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)
    ax.set_ylim(bottom=-0.1, top=4)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / DATASET / "mitfa_expression_elf1_knockout.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    # Display the plot
    plt.show()
../_images/7e8ac568d4a38804e162cd9fc957197c7e9080bb9d0c60c02ddcfd91ba6a0f05.png
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["sox10"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)
    ax.set_ylim(bottom=-0.1, top=4)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / DATASET / "sox10_expression_elf1_knockout.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    # Display the plot
    plt.show()
../_images/b56a6ec72f31c840f82c4767e98d34530f78521ad83293e3c6e7c1fce2c55729.png
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["fli1a"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / DATASET / "fli1a_expression_elf1_knockout.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    # Display the plot
    plt.show()
../_images/49866fc5eed286dd0e2a3e30019fc8592b2846a619bfd0f7d67e0150b093cfdd.png

test significance#

scipy.stats.ttest_ind(
    seu_small[np.array(genotype) == "elf1", "mitfa"].X.toarray().reshape(-1),
    seu_small[np.array(genotype) != "elf1", "mitfa"].X.toarray().reshape(-1),
    equal_var=False,
    alternative="less",
)
TtestResult(statistic=-3.429003795549063, pvalue=0.0003452279743702678, df=300.522449116822)
scipy.stats.ttest_ind(
    seu_small[np.array(genotype) == "elf1", "sox10"].X.toarray().reshape(-1),
    seu_small[np.array(genotype) != "elf1", "sox10"].X.toarray().reshape(-1),
    equal_var=False,
    alternative="less",
)
TtestResult(statistic=-4.108995730134873, pvalue=2.6789127768845648e-05, df=255.2575590451451)
scipy.stats.ttest_ind(
    seu_small[np.array(genotype) == "elf1", "fli1a"].X.toarray().reshape(-1),
    seu_small[np.array(genotype) != "elf1", "fli1a"].X.toarray().reshape(-1),
    equal_var=False,
    alternative="greater",
)
TtestResult(statistic=1.3860128076275413, pvalue=0.0835617324921002, df=223.63128948496157)

Visualize differential expressed genes in pigment cell lineage of elf1 perturbation panel#

perturbseq = adata.copy()
perturbseq = perturbseq[perturbseq.obs["cell_anno_new"].isin(["Pigment"]),]
seu_small = perturbseq[perturbseq.obs["sgRNA_group"].isin(["control", "elf1"]),]
genotype = seu_small.obs["sgRNA_group"].tolist()
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["pmp22a"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / DATASET / "pmp22a_expression_elf1_knockout.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    # Display the plot
    plt.show()
../_images/06d5ae9098da2c819311f12b6ff55a000854cf41b3b238c2e5901f3edaba2735.png
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["cited1"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / DATASET / "cited1_expression_elf1_knockout.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    # Display the plot
    plt.show()
../_images/cd91b8a2f1830e983cd043f4dd52d70237ba1fec47086deec16929a8a32e3a28.png
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["hmgn2"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / DATASET / "hmgn2_expression_elf1_knockout.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    # Display the plot
    plt.show()
../_images/ea57db1348ec037f5f956d0f566d3845abb19608b2d01d3ad156f7b30740ef2b.png
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["arl6ip1"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / DATASET / "arl6ip1_expression_elf1_knockout.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    # Display the plot
    plt.show()
../_images/7be389076d324c2b0744099d184b9f1071a1e9a9300928e88a38eca9e7a16e83.png

Visualize elf1 gene expression change when knockout tfec and fli1a#

knockout tfec#

perturbseq = adata.copy()
perturbseq = perturbseq[
    ~perturbseq.obs["cell_anno_new"].isin(["unclassified2", "unclassified1", "Mutant_hox23", "Mutant"]),
]
seu_small = perturbseq[perturbseq.obs["sgRNA_group"].isin(["control", "tfec"]),]
genotype = seu_small.obs["sgRNA_group"].tolist()
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["elf1"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / DATASET / "elf1_expression_tfec_knockout.svg", format="svg", transparent=True, bbox_inches="tight"
        )

    # Display the plot
    plt.show()
../_images/cd11c12ae7d090c54cc0ede5baa71fdff9bd8b005ad23619dbc3148f0e848edd.png

test significance#

scipy.stats.ttest_ind(
    seu_small[np.array(genotype) == "tfec", "elf1"].X.toarray().reshape(-1),
    seu_small[np.array(genotype) != "tfec", "elf1"].X.toarray().reshape(-1),
    equal_var=False,
    alternative="less",
)
TtestResult(statistic=-5.366020270431321, pvalue=8.610173685738961e-08, df=272.49193453818)

knockout fli1a#

perturbseq = adata.copy()
perturbseq = perturbseq[
    ~perturbseq.obs["cell_anno_new"].isin(["unclassified2", "unclassified1", "Mutant_hox23", "Mutant"]),
]
seu_small = perturbseq[perturbseq.obs["sgRNA_group"].isin(["control", "fli1a"]),]
genotype = seu_small.obs["sgRNA_group"].tolist()
with mplscience.style_context():  # Use the mplscience style context
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(3, 3))

    # Create the scatter plot
    sc.pl.violin(seu_small, ["elf1"], multi_panel=False, groupby="sgRNA_group", stripplot=True, ax=ax, show=False)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / DATASET / "elf1_expression_fli1a_knockout.svg",
            format="svg",
            transparent=True,
            bbox_inches="tight",
        )

    # Display the plot
    plt.show()
../_images/9b9c3ae546e3cccba0e78a64e9a6721b5daaaef56b704e3e6d973d993762597d.png

test significance#

scipy.stats.ttest_ind(
    seu_small[np.array(genotype) == "fli1a", "elf1"].X.toarray().reshape(-1),
    seu_small[np.array(genotype) != "fli1a", "elf1"].X.toarray().reshape(-1),
    equal_var=False,
    alternative="greater",
)
TtestResult(statistic=3.059975496268048, pvalue=0.0011362106276185085, df=1002.4934192631699)