annotate cravat_annotate_mutations-18ce5c6169ef/cravat_annotate.py @ 8:4d17381a30de draft

Uploaded
author in_silico
date Tue, 12 Jun 2018 10:56:40 -0400
parents 6b7ce75ea2f8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
1 import requests
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
2 import json
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
3 import sys
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
4 from __builtin__ import False
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
5 #Gets the input and output from galaxy
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
6 input_filename = sys.argv[1]
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
7 output_filename = sys.argv[2]
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
8
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
9 #opens each file, in to read, out to write
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
10 in_file = open(input_filename, "r")
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
11 out_file = open(output_filename, "w")
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
12
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
13
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
14 #sets replacements to replace each space in genomic coordinates with an underscore to run with the query
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
15 replacements = {' ':'_'}
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
16 #so we only print out the Keys once
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
17 write_header = True
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
18
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
19 #loops through the input file line by line
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
20 for line in in_file:
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
21 #strips the input line of \n (new line) and replaces every space with an underscore
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
22 line = "_".join( line.split() )
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
23 #gets request from CRAVAT server with the inputed mutation line
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
24 call = requests.get('http://staging.cravat.us/CRAVAT/rest/service/query', params={'mutation': line} )
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
25 #puts the string of data into a json dictionary
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
26 json_data = json.loads(call.text)
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
27 #manually sets the order of the Keys to the same as CRAVAT Server
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
28 keys = ["Chromosome","Position","Strand","Reference base(s)","Alternate base(s)","HUGO symbol",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
29 "Sequence ontology transcript","Sequence ontology protein change","Sequence ontology",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
30 "Sequence ontology all transcripts","ExAC total allele frequency",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
31 "ExAC allele frequency (African/African American)","ExAC allele frequency (Latino)",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
32 "ExAC allele frequency (East Asian)","ExAC allele frequency (Finnish)",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
33 "ExAC allele frequency (Non-Finnish European)","ExAC allele frequency (Other)",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
34 "ExAC allele frequency (South Asian)", "1000 Genomes allele frequency",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
35 "ESP6500 allele frequency (European American)","ESP6500 allele frequency (African American)",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
36 "Transcript in COSMIC","Protein sequence change in COSMIC",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
37 "Occurrences in COSMIC [exact nucleotide change]","Mappability Warning","Driver Genes",
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
38 "TARGET","dbSNP","MuPIT Link"]
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
39 print json_data
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
40
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
41 #Spit out first 8 or 9, then loop through rest and print out
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
42 for key in json_data:
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
43 if key not in keys:
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
44 keys.append(key)
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
45
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
46 #used so we only print out the Keys once
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
47 if write_header == True:
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
48 #writes out the keys of the dictionary
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
49 out_file.write('\t'.join(keys) + '\n')
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
50 write_header = False
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
51
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
52 #sets value to the first value in the first key
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
53 value = json_data[keys[0]]
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
54 #actually writes out the value
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
55 out_file.write(value)
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
56 #print "key[" + key[0] + "] value[" + str(value) + "]"
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
57 #sets value to the second key
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
58 value = json_data[keys[1]]
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
59 out_file.write('\t' + value)
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
60 #print "key[" + key[1] + "] value[" + str(value) + "]"
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
61
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
62
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
63 #loops through all other values for each key
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
64 for key in keys[2:]:
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
65 #strips the value
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
66 value = json_data[key].strip()
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
67 #another try, except statement to convert the rest of the values to floats, and then round them to four decimals
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
68 try:
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
69 value = float(value)
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
70 value = '%.4f'%value
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
71 except:
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
72 pass
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
73 #writes out the value with a tab after for galaxy formatting
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
74 out_file.write("\t" + str(value))
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
75 #print for debugging
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
76 print "key[" + key + "] value[" + str(value) + "]"
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
77 #creates a new line for the next set of values
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
78 out_file.write('\n')
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
79
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
80
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
81 #closes both files
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
82 in_file.close()
6b7ce75ea2f8 Uploaded
in_silico
parents:
diff changeset
83 out_file.close()