logo Dexamethasone Benchmark Resource
  • About
  • Code
  • Data
  • Figures
  • Stats

Retrieving L1000 Data with SigComLINCS API¶

import json
import requests
import pandas as pd
metadata_api = "https://maayanlab.cloud/sigcom-lincs/metadata-api"

Chemical Perturbation Signatures¶

# Enter perturbagen of interest
chempert = 'dexamethasone'
payload = {
    "filter": {
        "where": {
            "meta": {
                "fullTextSearch": "Chemical Perturbation 2021"
            }
        }
    }
}

res = requests.post(metadata_api + "/libraries/find", json=payload)
res.json()
[{'$validator': '/dcic/signature-commons-schema/v5/core/library.json',
  'id': '54198d6e-fe17-5ef8-91ac-02b425761653',
  'resource': 'f2bace34-022d-4147-9ca4-7b6e450c794d',
  'dataset': 'l1000_cp',
  'dataset_type': 'rank_matrix',
  'meta': {'date': '2021-06-10',
   'icon': './static/images/lincs/CMap.png',
   'size': '35.57 GB',
   'assay': 'L1000 mRNA profiling assay',
   'center': 'LINCS Center for Transcriptomics (Broad Institute)',
   '$validator': 'https://raw.githubusercontent.com/MaayanLab/sigcom-lincs/main/validators/lincs_datasets.json',
   'total_size': 35565630496,
   'url_prefix': 'https://lincs-dcic.s3.amazonaws.com/LINCS-sigs-2021/cd/cp',
   'datalevel_5': {'id': 'L1000_cp',
    'date': '2021-06-10',
    'link': 'https://clue.io/data/CMap2020#LINCS2020',
    'size': '35.57 GB',
    'version': '1',
    'file_url': 'https://lincs-dcic.s3.amazonaws.com/LINCS-sigs-2021/gctx/cd-coefficient/cp_coeff_mat.gctx',
    'filesize': 35565630496,
    'datalevel': 5,
    'description': 'LINCS L1000 Chemical Perturbations (2021)'},
   'description': 'LINCS L1000 Chemical Perturbations (2021)',
   '$download_counter': 12}}]
chem_pert_dataset = res.json()[0]
chem_dataset = chem_pert_dataset['dataset']
libid = chem_pert_dataset["id"]
payload = {
    "filter": {
        "where": {
            "meta.pert_name": chempert,
            "library": libid
        }
    }
}

chem_res = requests.post(metadata_api + "/signatures/find", json=payload)
chem_signatures = chem_res.json()
len(chem_signatures)
467
chem_signatures[0]
{'$validator': '/dcic/signature-commons-schema/v5/core/signature.json',
 'id': '0151d673-edfa-58eb-a0f4-9d163eda17ca',
 'library': '54198d6e-fe17-5ef8-91ac-02b425761653',
 'meta': {'md5': 'f96c2a71ca38b0ec02cc03e99b0fcf12',
  'sha256': '405756e3568ae98df502ecb5e7c3ec7386b0bc8527b0a00907066fe10fdb9317',
  'tissue': 'skeletal muscle organ',
  'anatomy': 'UBERON:0014892',
  'cmap_id': 'CPC015_SKB_24H:BRD-K47635719-001-03-9:10',
  'version': 1,
  'filename': 'L1000_LINCS_DCIC_CPC015_SKB_24H_C04_dexamethasone_10uM.tsv',
  'local_id': 'CPC015_SKB_24H_C04_dexamethasone_10uM',
  'cell_line': 'SKB',
  'pert_dose': '10 uM',
  'pert_name': 'dexamethasone',
  'pert_time': '24 h',
  'pert_type': 'Chemical',
  '$validator': 'https://raw.githubusercontent.com/MaayanLab/sigcom-lincs/main/validators/l1000_signatures.json',
  'data_level': 5,
  'pubchem_id': 5743,
  'creation_time': '2021-05-15',
  'persistent_id': 'https://lincs-dcic.s3.amazonaws.com/LINCS-sigs-2021/cd/cp/L1000_LINCS_DCIC_CPC015_SKB_24H_C04_dexamethasone_10uM.tsv',
  'size_in_bytes': 217239,
  'uncompressed_size_in_bytes': 217239}}
chem_table = pd.DataFrame([chem_signatures[0]['meta']])
for i in range(1, len(chem_signatures)):
    row = pd.DataFrame([chem_signatures[i]['meta']])
    chem_table = pd.concat([chem_table, row])
chem_table = chem_table[[
    'tissue', 'disease','cell_line','pert_dose','pert_name',
    'pert_time','pert_type','data_level','creation_time','persistent_id'
]].reset_index().drop(columns=['index'])
chem_table['persistent_id'] = chem_table['persistent_id'].apply(
    lambda x: x.replace('.tsv', '.tsv.gz').replace('LINCS-sigs-2021/cd/cp', 'LINCS-data-2020/L1000/compound')
)
chem_table['batch'] = chem_table['persistent_id'].apply(
    lambda x: '_'.join(x.split('DCIC_')[1].split('_')[:3])
)
chem_table.head()
tissue disease cell_line pert_dose pert_name pert_time pert_type data_level creation_time persistent_id batch
0 skeletal muscle organ NaN SKB 10 uM dexamethasone 24 h Chemical 5 2021-05-15 https://lincs-dcic.s3.amazonaws.com/LINCS-data... CPC015_SKB_24H
1 prostate gland prostate adenocarcinoma VCAP 10 uM dexamethasone 6 h Chemical 5 2021-05-11 https://lincs-dcic.s3.amazonaws.com/LINCS-data... CPC009_VCAP_6H
2 prostate gland prostate adenocarcinoma PC3 10 uM dexamethasone 6 h Chemical 5 2021-05-10 https://lincs-dcic.s3.amazonaws.com/LINCS-data... CPC009_PC3_6H
3 lung lung cancer HCC15 10 uM dexamethasone 6 h Chemical 5 2021-05-08 https://lincs-dcic.s3.amazonaws.com/LINCS-data... CPC006_HCC15_6H
4 kidney NaN HA1E 10 uM dexamethasone 6 h Chemical 5 2021-05-08 https://lincs-dcic.s3.amazonaws.com/LINCS-data... CPC006_HA1E_6H
chem_table.to_csv(f"{chempert.capitalize()}_L1000_ChemPert_data.tsv", sep='\t', index=False)

Overexpression¶

# Set overexpression perturbagen
oepert = 'NR3C1'
payload = {
    "filter": {
        "where": {
            "meta": {
                "fullTextSearch": "Overexpression Perturbations 2021"
            }
        }
    }
}

res = requests.post(metadata_api + "/libraries/find", json=payload)
res.json()
[{'$validator': '/dcic/signature-commons-schema/v5/core/library.json',
  'id': 'ef9389a8-53d3-50db-90cc-57e7d150b76c',
  'resource': 'f2bace34-022d-4147-9ca4-7b6e450c794d',
  'dataset': 'l1000_oe',
  'dataset_type': 'rank_matrix',
  'meta': {'date': '2021-06-10',
   'icon': './static/images/lincs/CMap.png',
   'size': '1.69 GB',
   'assay': 'L1000 mRNA profiling assay',
   'center': 'LINCS Center for Transcriptomics (Broad Institute)',
   '$validator': 'https://raw.githubusercontent.com/MaayanLab/sigcom-lincs/main/validators/lincs_datasets.json',
   'total_size': 1693041160,
   'url_prefix': 'https://lincs-dcic.s3.amazonaws.com/LINCS-sigs-2021/cd/oe',
   'datalevel_5': {'id': 'L1000_oe',
    'date': '2021-06-10',
    'link': 'https://clue.io/data/CMap2020#LINCS2020',
    'size': '1.69 GB',
    'version': '1',
    'file_url': 'https://lincs-dcic.s3.amazonaws.com/LINCS-sigs-2021/gctx/cd-coefficient/oe_coeff_mat.gctx',
    'filesize': 1693041160,
    'datalevel': 5,
    'description': 'LINCS L1000 Overexpression Perturbations (2021)'},
   'description': 'LINCS L1000 Overexpression Perturbations (2021)',
   '$download_counter': 5}}]
oe_pert_dataset = res.json()[0]
oe_dataset = oe_pert_dataset['dataset']
libid = oe_pert_dataset["id"]
payload = {
    "filter": {
        "where": {
            "library": libid,
            "meta.pert_name": oepert
        }
    }
}

oe_res = requests.post(metadata_api + "/signatures/find", json=payload)
oe_signatures = oe_res.json()
len(oe_signatures)
18
oe_table = pd.DataFrame([oe_signatures[0]['meta']])
for i in range(1, len(oe_signatures)):
    row = pd.DataFrame([oe_signatures[i]['meta']])
    oe_table = pd.concat([oe_table, row])
oe_table = oe_table[[
    'tissue','disease','cell_line','pert_name','pert_time',
    'pert_type','data_level','creation_time','persistent_id','pert_dose'
]].reset_index().drop(columns=['index'])
oe_table['persistent_id'] = oe_table['persistent_id'].apply(
    lambda x: x.replace('.tsv', '.tsv.gz').replace('LINCS-sigs-2021/cd/oe', 'LINCS-data-2020/L1000/oe')
)
oe_table['batch'] = oe_table['persistent_id'].apply(
    lambda x: '_'.join(x.split('DCIC_')[1].split('_')[:3])
)
oe_table.head()
tissue disease cell_line pert_name pert_time pert_type data_level creation_time persistent_id pert_dose batch
0 prostate gland prostate adenocarcinoma PC3 NR3C1 96 h Overexpression 5 2021-05-19 https://lincs-dcic.s3.amazonaws.com/LINCS-data... NaN OEB005_PC3_96H
1 breast breast adenocarcinoma MCF7 NR3C1 96 h Overexpression 5 2021-05-19 https://lincs-dcic.s3.amazonaws.com/LINCS-data... NaN OEB005_MCF7_96H
2 intestine colon adenocarcinoma HT29 NR3C1 96 h Overexpression 5 2021-05-19 https://lincs-dcic.s3.amazonaws.com/LINCS-data... NaN OEB005_HT29_96H
3 liver carcinoma HEPG2 NR3C1 96 h Overexpression 5 2021-05-19 https://lincs-dcic.s3.amazonaws.com/LINCS-data... NaN OEB005_HEPG2_96H
4 lung lung cancer HCC515 NR3C1 96 h Overexpression 5 2021-05-19 https://lincs-dcic.s3.amazonaws.com/LINCS-data... NaN OEB005_HCC515_96H
oe_table.to_csv(f"{oepert}_L1000_OE_data.tsv", sep='\t', index=False)

shRNA Knockdown¶

shrnapert = 'NR3C1'
payload = {
    "filter": {
        "where": {
            "meta": {
                "fullTextSearch": "shRNA Perturbations 2021"
            }
        }
    }
}

res = requests.post(metadata_api + "/libraries/find", json=payload)
res.json()
[{'$validator': '/dcic/signature-commons-schema/v5/core/library.json',
  'id': '8f1ff550-ece8-591d-a213-2763f854c008',
  'resource': 'f2bace34-022d-4147-9ca4-7b6e450c794d',
  'dataset': 'l1000_shRNA',
  'dataset_type': 'rank_matrix',
  'meta': {'date': '2021-06-10',
   'icon': './static/images/lincs/CMap.png',
   'size': '7.83 GB',
   'assay': 'L1000 mRNA profiling assay',
   'center': 'LINCS Center for Transcriptomics (Broad Institute)',
   '$validator': 'https://raw.githubusercontent.com/MaayanLab/sigcom-lincs/main/validators/lincs_datasets.json',
   'total_size': 7826067080,
   'url_prefix': 'https://lincs-dcic.s3.amazonaws.com/LINCS-sigs-2021/cd/shRNA',
   'datalevel_5': {'id': 'L1000_shRNA',
    'date': '2021-06-10',
    'link': 'https://clue.io/data/CMap2020#LINCS2020',
    'size': '7.83 GB',
    'version': '1',
    'file_url': 'https://lincs-dcic.s3.amazonaws.com/LINCS-sigs-2021/gctx/cd-coefficient/shRNA_coeff_mat.gctx',
    'filesize': 7826067080,
    'datalevel': 5,
    'description': 'LINCS L1000 shRNA Perturbations (2021)'},
   'description': 'LINCS L1000 shRNA Perturbations (2021)',
   '$download_counter': 16}}]
shrna_pert_dataset = res.json()[0]
shrna_dataset = shrna_pert_dataset['dataset']
libid = shrna_pert_dataset["id"]
payload = {
    "filter": {
        "where": {
            "library": libid,
            "meta.pert_name": shrnapert
        }
    }
}

shrna_res = requests.post(metadata_api + "/signatures/find", json=payload)
shrna_signatures = shrna_res.json()
len(shrna_signatures)
96
shrna_table = pd.DataFrame([shrna_signatures[0]['meta']])
for i in range(1, len(shrna_signatures)):
    row = pd.DataFrame([shrna_signatures[i]['meta']])
    shrna_table = pd.concat([shrna_table, row])
shrna_table = shrna_table[[
    'tissue','disease','cell_line','pert_name','pert_time',
    'pert_type','data_level','creation_time','persistent_id'
]].reset_index().drop(columns=['index'])
shrna_table['persistent_id'] = shrna_table['persistent_id'].apply(
    lambda x: x.replace('.tsv', '.tsv.gz').replace('LINCS-sigs-2021/cd/shRNA', 'LINCS-data-2020/L1000/shRNA')
)
shrna_table['batch'] = shrna_table['persistent_id'].apply(
    lambda x: '_'.join(x.split('DCIC_')[1].split('_')[:3])
)
shrna_table.head()
tissue disease cell_line pert_name pert_time pert_type data_level creation_time persistent_id batch
0 prostate gland prostate adenocarcinoma VCAP NR3C1 120 h shRNA 5 2021-05-13 https://lincs-dcic.s3.amazonaws.com/LINCS-data... KDC007_VCAP_120H
1 prostate gland prostate adenocarcinoma PC3 NR3C1 96 h shRNA 5 2021-05-13 https://lincs-dcic.s3.amazonaws.com/LINCS-data... KDC007_PC3_96H
2 prostate gland prostate adenocarcinoma VCAP NR3C1 120 h shRNA 5 2021-05-15 https://lincs-dcic.s3.amazonaws.com/LINCS-data... KDC007_VCAP_120H
3 prostate gland prostate adenocarcinoma VCAP NR3C1 120 h shRNA 5 2021-05-15 https://lincs-dcic.s3.amazonaws.com/LINCS-data... KDC007_VCAP_120H
4 breast breast adenocarcinoma MCF7 NR3C1 96 h shRNA 5 2021-05-13 https://lincs-dcic.s3.amazonaws.com/LINCS-data... KDC007_MCF7_96H
shrna_table.to_csv(f"{shrnapert}_L1000_shRNA_data.tsv", sep='\t', index=False)

CRISPR Knockout¶

crisprkopert = 'NR1I2'
payload = {
    "filter": {
        "where": {
            "meta": {
                "fullTextSearch": "CRISPR Perturbations 2021"
            }
        }
    }
}

res = requests.post(metadata_api + "/libraries/find", json=payload)
res.json()
[{'$validator': '/dcic/signature-commons-schema/v5/core/library.json',
  'id': '96c7b8c5-1eca-5764-88e4-e4ccaee6603f',
  'resource': 'f2bace34-022d-4147-9ca4-7b6e450c794d',
  'dataset': 'l1000_xpr',
  'dataset_type': 'rank_matrix',
  'meta': {'date': '2021-06-10',
   'icon': './static/images/lincs/CMap.png',
   'size': '6.98 GB',
   'assay': 'L1000 mRNA profiling assay',
   'center': 'LINCS Center for Transcriptomics (Broad Institute)',
   '$validator': 'https://raw.githubusercontent.com/MaayanLab/sigcom-lincs/main/validators/lincs_datasets.json',
   'total_size': 6980690856,
   'url_prefix': 'https://lincs-dcic.s3.amazonaws.com/LINCS-sigs-2021/cd/xpr',
   'datalevel_5': {'id': 'L1000_xpr',
    'date': '2021-06-10',
    'link': 'https://clue.io/data/CMap2020#LINCS2020',
    'size': '6.98 GB',
    'version': '1',
    'file_url': 'https://lincs-dcic.s3.amazonaws.com/LINCS-sigs-2021/gctx/cd-coefficient/xpr_coeff_mat.gctx',
    'filesize': 6980690856,
    'datalevel': 5,
    'description': 'LINCS L1000 CRISPR Perturbations (2021)'},
   'description': 'LINCS L1000 CRISPR Perturbations (2021)',
   '$download_counter': 16}}]
crisprko_pert_dataset = res.json()[0]
crisprko_dataset = crisprko_pert_dataset['dataset']
libid = crisprko_pert_dataset["id"]
payload = {
    "filter": {
        "where": {
            "library": libid,
            "meta.pert_name": crisprkopert
        }
    }
}

crisprko_res = requests.post(metadata_api + "/signatures/find", json=payload)
crisprko_signatures = crisprko_res.json()
len(crisprko_signatures)
20
crisprko_table = pd.DataFrame([crisprko_signatures[0]['meta']])
for i in range(1, len(crisprko_signatures)):
    row = pd.DataFrame([crisprko_signatures[i]['meta']])
    crisprko_table = pd.concat([crisprko_table, row])
crisprko_table = crisprko_table[[
    'tissue','disease','cell_line','pert_name','pert_time',
    'pert_type','data_level','creation_time','persistent_id'
]].reset_index().drop(columns=['index'])
crisprko_table['persistent_id'] = crisprko_table['persistent_id'].apply(
    lambda x: x.replace('.tsv', '.tsv.gz').replace('LINCS-sigs-2021/cd/xpr', 'LINCS-data-2020/L1000/xpr')
)
crisprko_table['batch'] = crisprko_table['persistent_id'].apply(
    lambda x: '_'.join(x.split('DCIC_')[1].split('_')[:3])
)
crisprko_table.head()
tissue disease cell_line pert_name pert_time pert_type data_level creation_time persistent_id batch
0 brain astrocytoma U251MG NR1I2 96 h CRISPR Knockout 5 2021-05-23 https://lincs-dcic.s3.amazonaws.com/LINCS-data... XPR010_U251MG.311_96H
1 brain astrocytoma U251MG NR1I2 96 h CRISPR Knockout 5 2021-05-23 https://lincs-dcic.s3.amazonaws.com/LINCS-data... XPR010_U251MG.311_96H
2 pancreas pancreatic carcinoma YAPC NR1I2 96 h CRISPR Knockout 5 2021-05-23 https://lincs-dcic.s3.amazonaws.com/LINCS-data... XPR010_YAPC.311_96H
3 pancreas pancreatic carcinoma YAPC NR1I2 96 h CRISPR Knockout 5 2021-05-23 https://lincs-dcic.s3.amazonaws.com/LINCS-data... XPR010_YAPC.311_96H
4 prostate gland prostate adenocarcinoma PC3 NR1I2 96 h CRISPR Knockout 5 2021-05-23 https://lincs-dcic.s3.amazonaws.com/LINCS-data... XPR010_PC3.311B_96H
crisprko_table.to_csv(f"{crisprkopert}_L1000_CRISPRKO_data.tsv", sep='\t', index=False)
View source code
Submit an issue