Source code for maayanlab_bioinformatics.utils.describe

''' Descriptive statistics on things that aren't pandas data frames.
This can often be a lot more efficient.
'''
import numpy as np
import typing as t

[docs] def np_describe(x, axis=0, *, percentiles=[25, 50, 75]) -> t.Dict[str, np.array]: ''' Like pandas Series.describe() but operating on numpy arrays / matrices. This can be a lot faster especially when working with h5py or sparse data frames. :params x: The numpy array to describe :params axis: The axis for which to perform describe against :returns: A dictionary mapping metric name to results ''' results = { 'count': (~np.isnan(x)).sum(axis=axis), 'mean': x.mean(axis=axis), 'std': x.std(axis=axis), 'min': x.min(axis=axis), 'max': x.max(axis=axis), } if percentiles: percentile = np.percentile(x, percentiles, axis=axis) results.update({ f"{p}%": percentile[i] for i, p in enumerate(percentiles) }) return results