๐Ÿ“ฅ Download all notebooks

3.2. Validation: Pathway signatures๏ƒ

๐Ÿ“˜ Overview๏ƒ

This notebook uses DESeq2 differential-expression results to perform pathway enrichment analysis with WikiPathways gene sets, producing โ€“logโ‚โ‚€ p-values that quantify pathway activation for each compound.

Inputs
DESeq2 differential expression data
Output
An AnnData file with โ€“logโ‚โ‚€ p-values for each pathwayโ€“compound pair
[ ]:
%%capture

!conda install -c bioconda gseapy
[1]:
import numpy as np


import dilimap as dmap
[2]:
%load_ext autoreload
%autoreload 2
[3]:
dmap.logging.print_version()
Running dilimap 1.0.2 (python 3.10.16) on 2025-06-29 15:29.

Run pathway enrichment analysis๏ƒ

[4]:
adata_deseq = dmap.s3.read('validation_data_deseq2.h5ad')
Package: s3://dilimap/public/data. Top hash: dd0504e0f5
[5]:
FDR = adata_deseq.to_df('FDR')  # adjusted p-values from DESeq2
[6]:
adata_wiki = dmap.pp.pathway_signatures(FDR)
[7]:
adata_wiki.obs = adata_deseq.obs.copy()

Mapping metadata and clinical annotations๏ƒ

[ ]:
## Simplify keys

adata_wiki.obs.rename(
    columns={
        'CONCENTRATION_UM': 'dose_uM',
        'DOSE_LEVEL': 'dose_level',
        'COMPOUND': 'compound_name',
    },
    inplace=True,
)

adata_wiki.obs['compound_name'] = np.where(
    adata_wiki.obs['compound_name'].isna(),
    adata_wiki.obs_names,
    adata_wiki.obs['compound_name'],
)

adata_wiki.obs_names = adata_wiki.obs_names.str.replace('CPZ', 'Chlorpromazine')
adata_wiki.obs.loc[adata_wiki.obs['compound_name'] == 'Ibrutinib', 'SPLIT'] = 'training'
[ ]:
## Cmax annotations
obs_names = adata_wiki.obs_names.str.lower()

df_CMAX = dmap.s3.read('compound_Cmax_values.csv')
df_CMAX.index = df_CMAX.index.str.lower()
adata_wiki.obs['Cmax_uM'] = obs_names.map(df_CMAX['Cmax_median'])

## DILI annotations
df_DILI = dmap.s3.read('compound_DILI_labels.csv')
df_DILI.index = df_DILI.index.str.lower()
for col in df_DILI.columns:
    adata_wiki.obs[col] = obs_names.map(df_DILI[col])

adata_wiki.obs['DILIrank'] = adata_wiki.obs['DILIrank'].replace(np.nan, '')
adata_wiki.obs['livertox_score'] = adata_wiki.obs['livertox_score'].replace(np.nan, '')

## Viability (LDH) IC10 annotations
df_LDH = dmap.s3.read('compound_cell_viability_IC10.csv')
df_LDH.index = df_LDH.index.str.lower()
for k in df_LDH.columns:
    adata_wiki.obs[f'LDH_{k}'] = obs_names.map(df_LDH[k])

## Number DEGs
adata_wiki.obs['n_DEG'] = adata_deseq.obs['n_DEG'] = (
    adata_deseq.layers['FDR'] < 0.05
).sum(1)
[ ]:
set(
    adata_wiki.obs_names[
        adata_wiki.obs['Cmax_uM'].isna() | adata_wiki.obs['DILI_label'].isna()
    ]
)
[ ]:
adata_wiki = adata_wiki[
    ~(adata_wiki.obs['Cmax_uM'].isna() | adata_wiki.obs['DILI_label'].isna())
].copy()
[ ]:
adata_wiki.X = np.nan_to_num(adata_wiki.X)  # set nans to zero

Push file to S3๏ƒ

[ ]:
# dmap.s3.write(adata_wiki, 'validation_data_pathways.h5ad', package_name='public/data')