Demo entry 6719678

   

Submitted by anonymous on Mar 13, 2018 at 09:36
Language: Python. Code size: 2.7 kB.

import os,csv
import pandas as pd
def get_molregno(fileName):
    a=[]
    with open(fileName+'.csv','r') as f:
        reader = csv.reader(f)
        for row in reader:
            #chembl_compound_id
            a.append(row[3])
    return a
def get_activities(compoundList):
     assayid_list=[]
     g=open(fileName+'_activities.csv','wb')
     spamwriter = csv.writer(g, dialect='excel')
     #spamwriter.writerow(['activity_id', 'assay_id', 'doc_id', 'record_id', 'molregno', 'standard_relation', 'published_value', 'published_units', 'standard_value', 'standard_units', 'standard_flag', 'standard_type', 'activity_comment', 'published_type', 'data_validity_comment', 'potential_duplicate', 'published_relation', 'pchembl_value', 'bao_endpoint', 'uo_units', 'qudt_units'])
     with open('activities.csv','r') as f: 
         reader = csv.reader(f)
         for row in reader:
             
             if row[4] in compoundList:
                 assayid_list.append(row[1])
                 spamwriter.writerow(row)
     g.close()
     return assayid_list
def get_assays(assayid_list):
    #target_infomation
    g=open(fileName+'_assays.csv','wb')
    spamwriter = csv.writer(g, dialect='excel')
    #spamwriter.writerow(['assay_id', 'doc_id', 'description', 'assay_type', 'assay_test_type', 'assay_category', 'assay_organism', 'assay_tax_id', 'assay_strain', 'assay_tissue', 'assay_cell_type', 'assay_subcellular_fraction', 'tid', 'relationship_type', 'confidence_score', 'curated_by', 'src_id', 'src_assay_id', 'chembl_id', 'cell_id', 'bao_format', 'tissue_id', 'variant_id'])
    with open('assays.csv','r') as f:        
        reader = csv.reader(f)
        for row in reader:
            if row[0] in assayid_list:
                #row[12]=>id ,row[0]=>assay_id
                spamwriter.writerow(row)
def combine(fileName):
    #linker - activities
    linker =pd.read_csv(fileName+'.csv')
    activities = pd.read_csv(fileName+'_activities.csv')
    linker_activities = pd.merge(activities,linker, on=['molregno','doc_id'],how='right')
    linker_activities.to_csv(r'linker_activitie.csv')
    #add assays
    assays = pd.read_csv(fileName+'_assays.csv')
    
    linker_activities_assays= pd.merge(linker_activities, assays, on=['assay_id'],how='left')
    target = pd.read_csv('target_dictionary.csv')
    linker_activities_assays_target = pd.merge(linker_activities_assays, target, on=['tid',], how='left')
    linker_activities_assays_target.to_csv(fileName+'_activities_target.csv')

fileName = 'all'
#mol_regno_id =   get_molregno(fileName)
#assay_compound = get_activities(mol_regno_id)
#get_assays(assay_compound)
combine(fileName)

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).