comparison cravat_submit/cravat_submit.py @ 0:399f41a4bad6 draft

Uploaded
author in_silico
date Tue, 12 Jun 2018 11:05:20 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:399f41a4bad6
1 import requests
2 import json
3 import time
4 import urllib
5 import sys
6 import csv
7 import pdb
8
9 input_filename = sys.argv[1]
10 input_select_bar = sys.argv[2]
11 output_filename = sys.argv[3]
12
13 #in_file = open('input_call.txt', "r")
14 #out_file = open('output_call.txt', "w")
15
16 write_header = True
17
18 #plugs in params to given URL
19 submit = requests.post('http://staging.cravat.us/CRAVAT/rest/service/submit', files={'inputfile':open(input_filename)}, data={'email':'znylund@insilico.us.com', 'analyses': input_select_bar})
20 #,'analysis':input_select_bar,'functionalannotation': "on"})
21 #Makes the data a json dictionary, takes out only the job ID
22 jobid = json.loads(submit.text)['jobid']
23 #out_file.write(jobid)
24 submitted = json.loads(submit.text)['status']
25 #out_file.write('\t' + submitted)
26
27 #loops until we find a status equal to Success, then breaks
28 while True:
29 check = requests.get('http://staging.cravat.us/CRAVAT/rest/service/status', params={'jobid': jobid})
30 status = json.loads(check.text)['status']
31 resultfileurl = json.loads(check.text)['resultfileurl']
32 #out_file.write(str(status) + ', ')
33 pdb.set_trace()
34 if status == 'Success':
35 #out_file.write('\t' + resultfileurl)
36 break
37 else:
38 time.sleep(2)
39
40 #out_file.write('\n')
41
42 #creates three files
43 file_1 = time.strftime("%H:%M") + '_Z_Variant_Result.tsv'
44 file_2 = time.strftime("%H:%M") + '_Z_Additional_Details.tsv'
45 file_3 = time.strftime("%H:%M") + 'Combined_Variant_Results.tsv'
46
47
48 #Download the two results
49 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv", file_1)
50 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv", file_2)
51
52 headers = []
53 duplicates = []
54
55 #opens the Variant Result file and the Variant Additional Details file as csv readers, then opens the output file (galaxy) as a writer
56 with open(file_1) as tsvin_1, open(file_2) as tsvin_2, open(output_filename, 'wb') as tsvout:
57 tsvreader_1 = csv.reader(tsvin_1, delimiter='\t')
58 tsvreader_2 = csv.reader(tsvin_2, delimiter='\t')
59 tsvout = csv.writer(tsvout, delimiter='\t')
60
61 #loops through each row in the Variant Additional Details file
62 for row in tsvreader_2:
63 #sets row_2 equal to the same row in Variant Result file
64 row_2 = tsvreader_1.next()
65 #checks if row is empty or if the first term contains '#'
66 if row == [] or row[0][0] == '#':
67 continue
68 #checks if the row begins with input line
69 if row[0] == 'Input line':
70 #Goes through each value in the headers list in VAD
71 for value in row:
72 #Adds each value into headers
73 headers.append(value)
74 #Loops through the Keys in VR
75 for value in row_2:
76 #Checks if the value is already in headers
77 if value in headers:
78 continue
79 #else adds the header to headers
80 else:
81 headers.append(value)
82
83 print headers
84 tsvout.writerow(headers)
85
86
87 else:
88
89 cells = []
90 #Goes through each value in the next list
91 for value in row:
92 #adds it to cells
93 cells.append(value)
94 #Goes through each value from the VR file after position 11 (After it is done repeating from VAD file)
95 for value in row_2[11:]:
96 #adds in the rest of the values to cells
97 cells.append(value)
98
99 print cells
100 tsvout.writerow(cells)
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124 #a = 'col1\tcol2\tcol3'
125 #header_list = a.split('\t')
126
127 #loop through the two results, when you first hit header you print out the headers in tabular form
128 #Print out each header only once
129 #Combine both headers into one output file
130 #loop through the rest of the data and assign each value to its assigned header
131 #combine this all into one output file
132
133
134
135
136