5
|
1 import requests
|
|
2 import json
|
|
3 import sys
|
|
4 from __builtin__ import False
|
|
5 #Gets the input and output from galaxy
|
|
6 input_filename = sys.argv[1]
|
|
7 output_filename = sys.argv[2]
|
|
8
|
|
9 #opens each file, in to read, out to write
|
|
10 in_file = open(input_filename, "r")
|
|
11 out_file = open(output_filename, "w")
|
|
12
|
|
13
|
|
14 #sets replacements to replace each space in genomic coordinates with an underscore to run with the query
|
|
15 replacements = {' ':'_'}
|
|
16 #so we only print out the Keys once
|
|
17 write_header = True
|
|
18
|
|
19 #loops through the input file line by line
|
|
20 for line in in_file:
|
|
21 #strips the input line of \n (new line) and replaces every space with an underscore
|
|
22 line = "_".join( line.split() )
|
|
23 #gets request from CRAVAT server with the inputed mutation line
|
|
24 call = requests.get('http://staging.cravat.us/CRAVAT/rest/service/query', params={'mutation': line} )
|
|
25 #puts the string of data into a json dictionary
|
|
26 json_data = json.loads(call.text)
|
|
27 #manually sets the order of the Keys to the same as CRAVAT Server
|
|
28 keys = ["Chromosome","Position","Strand","Reference base(s)","Alternate base(s)","HUGO symbol",
|
|
29 "Sequence ontology transcript","Sequence ontology protein change","Sequence ontology",
|
|
30 "Sequence ontology all transcripts","ExAC total allele frequency",
|
|
31 "ExAC allele frequency (African/African American)","ExAC allele frequency (Latino)",
|
|
32 "ExAC allele frequency (East Asian)","ExAC allele frequency (Finnish)",
|
|
33 "ExAC allele frequency (Non-Finnish European)","ExAC allele frequency (Other)",
|
|
34 "ExAC allele frequency (South Asian)", "1000 Genomes allele frequency",
|
|
35 "ESP6500 allele frequency (European American)","ESP6500 allele frequency (African American)",
|
|
36 "Transcript in COSMIC","Protein sequence change in COSMIC",
|
|
37 "Occurrences in COSMIC [exact nucleotide change]","Mappability Warning","Driver Genes",
|
|
38 "TARGET","dbSNP","MuPIT Link"]
|
|
39 print json_data
|
|
40
|
|
41 #Spit out first 8 or 9, then loop through rest and print out
|
|
42 for key in json_data:
|
|
43 if key not in keys:
|
|
44 keys.append(key)
|
|
45
|
|
46 #used so we only print out the Keys once
|
|
47 if write_header == True:
|
|
48 #writes out the keys of the dictionary
|
|
49 out_file.write('\t'.join(keys) + '\n')
|
|
50 write_header = False
|
|
51
|
|
52 #sets value to the first value in the first key
|
|
53 value = json_data[keys[0]]
|
|
54 #actually writes out the value
|
|
55 out_file.write(value)
|
|
56 #print "key[" + key[0] + "] value[" + str(value) + "]"
|
|
57 #sets value to the second key
|
|
58 value = json_data[keys[1]]
|
|
59 out_file.write('\t' + value)
|
|
60 #print "key[" + key[1] + "] value[" + str(value) + "]"
|
|
61
|
|
62
|
|
63 #loops through all other values for each key
|
|
64 for key in keys[2:]:
|
|
65 #strips the value
|
|
66 value = json_data[key].strip()
|
|
67 #another try, except statement to convert the rest of the values to floats, and then round them to four decimals
|
|
68 try:
|
|
69 value = float(value)
|
|
70 value = '%.4f'%value
|
|
71 except:
|
|
72 pass
|
|
73 #writes out the value with a tab after for galaxy formatting
|
|
74 out_file.write("\t" + str(value))
|
|
75 #print for debugging
|
|
76 print "key[" + key + "] value[" + str(value) + "]"
|
|
77 #creates a new line for the next set of values
|
|
78 out_file.write('\n')
|
|
79
|
|
80
|
|
81 #closes both files
|
|
82 in_file.close()
|
|
83 out_file.close()
|