Source code for maayanlab_bioinformatics.dge.ttest
import pandas as pd
import scipy.stats
from maayanlab_bioinformatics.normalization import log2_normalize
[docs]
def ttest_differential_expression(controls_mat: pd.DataFrame, cases_mat: pd.DataFrame, equal_var=False, alternative='two-sided', log2norm=True):
''' Given two separate dataframes (controls, cases) with a shared index (genes),
we compute the ttest differential expression for all genes. Benjamini-Hochberg Adjusted p-value.
:param controls_mat: (pd.DataFrame) the control samples (samples as columns and genes as rows)
:param cases_mat: (pd.DataFrame) the case samples (samples as columns and genes as rows)
:param equal_var: (bool) Should t-test assume equal variance (default: False)
:param alternative: (str) Alternative hypothesis (see scipy.stats.ttest_ind) (default: two-sided)
:param log2norm: (bool) Apply log2norm, typically keep with raw counts but disable if you have normalized data (default: True)
:return: A data frame with the results
'''
assert (controls_mat.index == cases_mat.index).all(), 'Index between controls and cases must be the same'
if log2norm:
cases_mat = log2_normalize(cases_mat)
controls_mat = log2_normalize(controls_mat)
results = scipy.stats.ttest_ind(cases_mat.T, controls_mat.T, equal_var=equal_var, alternative=alternative)
df_results = pd.DataFrame({
'Statistic': results.statistic,
'Pval': results.pvalue,
}, index=controls_mat.index)
df_results['AdjPval'] = scipy.stats.false_discovery_control(df_results['Pval'].fillna(1.), method='bh')
df_results.sort_values('AdjPval', inplace=True)
return df_results