comparison cravat_annotate_mutations-18ce5c6169ef/cravat_annotate.py @ 5:6b7ce75ea2f8 draft

Uploaded
author in_silico
date Wed, 19 Jul 2017 14:49:03 -0400
parents
children
comparison
equal deleted inserted replaced
4:27c0b0f841c8 5:6b7ce75ea2f8
1 import requests
2 import json
3 import sys
4 from __builtin__ import False
5 #Gets the input and output from galaxy
6 input_filename = sys.argv[1]
7 output_filename = sys.argv[2]
8
9 #opens each file, in to read, out to write
10 in_file = open(input_filename, "r")
11 out_file = open(output_filename, "w")
12
13
14 #sets replacements to replace each space in genomic coordinates with an underscore to run with the query
15 replacements = {' ':'_'}
16 #so we only print out the Keys once
17 write_header = True
18
19 #loops through the input file line by line
20 for line in in_file:
21 #strips the input line of \n (new line) and replaces every space with an underscore
22 line = "_".join( line.split() )
23 #gets request from CRAVAT server with the inputed mutation line
24 call = requests.get('http://staging.cravat.us/CRAVAT/rest/service/query', params={'mutation': line} )
25 #puts the string of data into a json dictionary
26 json_data = json.loads(call.text)
27 #manually sets the order of the Keys to the same as CRAVAT Server
28 keys = ["Chromosome","Position","Strand","Reference base(s)","Alternate base(s)","HUGO symbol",
29 "Sequence ontology transcript","Sequence ontology protein change","Sequence ontology",
30 "Sequence ontology all transcripts","ExAC total allele frequency",
31 "ExAC allele frequency (African/African American)","ExAC allele frequency (Latino)",
32 "ExAC allele frequency (East Asian)","ExAC allele frequency (Finnish)",
33 "ExAC allele frequency (Non-Finnish European)","ExAC allele frequency (Other)",
34 "ExAC allele frequency (South Asian)", "1000 Genomes allele frequency",
35 "ESP6500 allele frequency (European American)","ESP6500 allele frequency (African American)",
36 "Transcript in COSMIC","Protein sequence change in COSMIC",
37 "Occurrences in COSMIC [exact nucleotide change]","Mappability Warning","Driver Genes",
38 "TARGET","dbSNP","MuPIT Link"]
39 print json_data
40
41 #Spit out first 8 or 9, then loop through rest and print out
42 for key in json_data:
43 if key not in keys:
44 keys.append(key)
45
46 #used so we only print out the Keys once
47 if write_header == True:
48 #writes out the keys of the dictionary
49 out_file.write('\t'.join(keys) + '\n')
50 write_header = False
51
52 #sets value to the first value in the first key
53 value = json_data[keys[0]]
54 #actually writes out the value
55 out_file.write(value)
56 #print "key[" + key[0] + "] value[" + str(value) + "]"
57 #sets value to the second key
58 value = json_data[keys[1]]
59 out_file.write('\t' + value)
60 #print "key[" + key[1] + "] value[" + str(value) + "]"
61
62
63 #loops through all other values for each key
64 for key in keys[2:]:
65 #strips the value
66 value = json_data[key].strip()
67 #another try, except statement to convert the rest of the values to floats, and then round them to four decimals
68 try:
69 value = float(value)
70 value = '%.4f'%value
71 except:
72 pass
73 #writes out the value with a tab after for galaxy formatting
74 out_file.write("\t" + str(value))
75 #print for debugging
76 print "key[" + key + "] value[" + str(value) + "]"
77 #creates a new line for the next set of values
78 out_file.write('\n')
79
80
81 #closes both files
82 in_file.close()
83 out_file.close()