Source code for maayanlab_bioinformatics.enrichment.gsea2003

import numpy as np
import pandas as pd

[docs] def GSEA2003(geneset_membership: pd.Series, gene_difference_metric: pd.Series): ''' Implementation of algorithm described here: https://pubmed.ncbi.nlm.nih.gov/12808457/ :param geneset_membership: (pd.Series) True if in set, False if not, index: all genes :param gene_difference_metric: (pd.Series) Difference metric between two classes, e.g. SNR difference :return (Tuple[np.array, np.array]) x and y arrays ready to be plotted. ES = y.max() ''' R_i = gene_difference_metric.sort_values(ascending=False) # R_1, ... R_N ordered by difference metric S = geneset_membership[R_i.index] # S containing gene_membership members G = geneset_membership.sum() N = geneset_membership.count() X = ( S * np.sqrt((N - G) / G) # X_i when member of S - (~S) * np.sqrt(G / (N - G)) # X_i when not member of S ) # 0 added to beginning for plotting, doesn't affect sum x = np.arange(N + 1) y = np.concatenate([[0],np.cumsum(X)]) return x, y