Source code for maayanlab_bioinformatics.harmonization.homologs

import pandas as pd

from maayanlab_bioinformatics.utils.fetch_save_read import fetch_save_read

[docs] def mouse_human_homologs(uppercase=False): ''' Returns a dataframe with mouse/human gene mappings based on MGI. See: http://www.informatics.jax.org/homology.shtml @param uppercase: bool should mappings be uppercase (i.e. for case insensitive mapping) @returns pd.DataFrame ``` |mouse|human| |-----|-----| |sp140|SP140| ``` ''' mouse_human_sequence = fetch_save_read( 'http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt', 'HOM_MouseHumanSequence.rpt', sep='\t', ) mouse_human_sequence_simplified = pd.DataFrame([ dict( mouse=d.loc[d['Common Organism Name'].str.contains('mouse'), 'Symbol'].values, human=d.loc[d['Common Organism Name'].str.contains('human'), 'Symbol'].values, ) for _, d in mouse_human_sequence.groupby('DB Class Key') ]).explode('mouse').explode('human').dropna() if uppercase: mouse_human_sequence_simplified['mouse'] = mouse_human_sequence_simplified['mouse'].str.upper() mouse_human_sequence_simplified['human'] = mouse_human_sequence_simplified['human'].str.upper() return mouse_human_sequence_simplified
[docs] def human_expression_to_mouse(human_expression, strategy='sum', uppercase=False): ''' Given a human expression matrix, produce a mouse-compatible expression matrix by mapping homologs. @param human_expression: pd.DataFrame(columns=samples, index=human_genes, values=counts) @param strategy: 'sum' -- the strategy to use when aggregating duplicates @returns pd.DataFrame(columns=samples, index=mouse_genes, values=counts) ''' if strategy == 'sum': mouse_expression = pd.merge( left=human_expression.set_index(human_expression.index.str.upper()), left_index=True, right=mouse_human_homologs(uppercase=uppercase), right_on='human' ).groupby('mouse').sum() else: raise NotImplementedError return mouse_expression
[docs] def mouse_expression_to_human(mouse_expression, strategy='sum', uppercase=False): ''' Given a mouse expression matrix, produce a human-compatible expression matrix by mapping homologs. @param mouse_expression: pd.DataFrame(columns=samples, index=mouse_genes, values=counts) @param strategy: 'sum' -- the strategy to use when aggregating duplicates @returns pd.DataFrame(columns=samples, index=human_genes, values=counts) ''' if strategy == 'sum': human_expression = pd.merge( left=mouse_expression.set_index(mouse_expression.index.str.upper()), left_index=True, right=mouse_human_homologs(uppercase=uppercase), right_on='mouse' ).groupby('human').sum() else: raise NotImplementedError return human_expression