Mercurial > repos > fabio > sbtas_se
comparison query.py @ 10:027f2e9d4a25 draft
Uploaded 20180131
author | fabio |
---|---|
date | Wed, 31 Jan 2018 17:29:01 -0500 |
parents | 256d015d69f9 |
children | 039e8e1e8b1f |
comparison
equal
deleted
inserted
replaced
9:f9ba0b65e1fa | 10:027f2e9d4a25 |
---|---|
5 | 5 |
6 import os, uuid, optparse, requests, json, time | 6 import os, uuid, optparse, requests, json, time |
7 #from requests_futures.sessions import FuturesSession | 7 #from requests_futures.sessions import FuturesSession |
8 | 8 |
9 #### NN14 #### | 9 #### NN14 #### |
10 service_url = "http://nn14.galaxyproject.org:8080/"; | 10 SERVICE_URL = "http://nn14.galaxyproject.org:8080/"; |
11 #service_url = "http://127.0.0.1:8082/"; | 11 #service_url = "http://127.0.0.1:8082/"; |
12 query_url = service_url+"tree/0/query"; | 12 QUERY_URL = SERVICE_URL+"tree/0/query"; |
13 status_url = service_url+"status/<task_id>"; | 13 STATUS_URL = SERVICE_URL+"status/<task_id>"; |
14 ############## | 14 ############## |
15 # query delay in seconds | 15 # query delay in seconds |
16 query_delay = 30; | 16 QUERY_DELAY = 30; |
17 ############## | 17 ############## |
18 | |
19 VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' | |
18 | 20 |
19 def query_request( options, args, payload ): | 21 def query_request( options, args, payload ): |
20 # add additional parameters to the payload | 22 # add additional parameters to the payload |
21 #payload["tree_id"] = str(options.treeid); | 23 #payload["tree_id"] = str(options.treeid); |
22 payload["search_mode"] = str(options.search); | 24 payload["search_mode"] = str(options.search); |
26 headers = {'Content-type': 'application/json'}; | 28 headers = {'Content-type': 'application/json'}; |
27 | 29 |
28 # create a session | 30 # create a session |
29 session = requests.Session(); | 31 session = requests.Session(); |
30 # make a synchronous post request to the query route | 32 # make a synchronous post request to the query route |
31 req = session.post(query_url, headers=headers, json=payload); | 33 req = session.post(QUERY_URL, headers=headers, json=payload); |
32 resp_code = req.status_code; | 34 resp_code = req.status_code; |
33 print(str(req.content)+"\n\n"); | 35 print(str(req.content)+"\n\n"); |
34 if resp_code == requests.codes.ok: | 36 if resp_code == requests.codes.ok: |
35 resp_content = str(req.content); | 37 resp_content = str(req.content); |
36 # convert out to json | 38 # convert out to json |
43 task_status = None; | 45 task_status = None; |
44 while task_processed is False: | 46 while task_processed is False: |
45 # create a new session | 47 # create a new session |
46 session = requests.Session(); | 48 session = requests.Session(); |
47 # make a synchronous get request to the status route | 49 # make a synchronous get request to the status route |
48 status_query_url = status_url.replace("<task_id>", task_id); | 50 status_query_url = STATUS_URL.replace("<task_id>", task_id); |
49 status_req = session.get(status_query_url); | 51 status_req = session.get(status_query_url); |
50 status_resp_content = str(status_req.content); | 52 status_resp_content = str(status_req.content); |
51 print(status_resp_content+"\n\n"); | 53 print(status_resp_content+"\n\n"); |
52 # convert out to json | 54 # convert out to json |
53 json_status_content = json.loads(status_resp_content); | 55 json_status_content = json.loads(status_resp_content); |
57 task_processed = True; | 59 task_processed = True; |
58 break; | 60 break; |
59 elif json_status_content['state'] in ['FAILURE', 'REVOKED']: | 61 elif json_status_content['state'] in ['FAILURE', 'REVOKED']: |
60 return "Task status: "+str(json_status_content['state']); | 62 return "Task status: "+str(json_status_content['state']); |
61 else: | 63 else: |
62 time.sleep(query_delay); # in seconds | 64 time.sleep(QUERY_DELAY); # in seconds |
63 | 65 |
64 # get output dir (collection) path | 66 # get output dir (collection) path |
65 output_dir_path = options.outputdir; | 67 output_dir_path = options.outputdir; |
66 if not os.path.exists(output_dir_path): | 68 if not os.path.exists(output_dir_path): |
67 os.makedirs(output_dir_path); | 69 os.makedirs(output_dir_path); |
68 out_file_format = "txt"; | 70 out_file_format = "tabular"; |
69 | 71 |
70 for block in json_status_content['results']: | 72 for block in json_status_content['results']: |
71 seq_id = block['sequence_id']; | 73 seq_id = block['sequence_id']; |
72 accessions = block['accession_numbers']; | 74 accessions = block['accession_numbers']; |
73 # put response block in the output collection | 75 # put response block in the output collection |
98 for line in content_file: | 100 for line in content_file: |
99 if line.strip() != "": | 101 if line.strip() != "": |
100 line_split = line.strip().split("\t"); # split on tab | 102 line_split = line.strip().split("\t"); # split on tab |
101 if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line | 103 if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line |
102 seq_id = line_split[0]; | 104 seq_id = line_split[0]; |
105 # fix seq_id using valid chars only | |
106 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) | |
103 seq_text = line_split[1]; | 107 seq_text = line_split[1]; |
104 if seq_id in multiple_data: | 108 if seq_id in multiple_data: |
105 return "Error: the id '"+seq_id+"' is duplicated"; | 109 return "Error: the id '"+seq_id+"' is duplicated"; |
106 multiple_data[seq_id] = seq_text; | 110 multiple_data[seq_id] = seq_text; |
107 if len(multiple_data) > 0: | 111 if len(multiple_data) > 0: |
122 for line in text_content: | 126 for line in text_content: |
123 if line.strip() != "": | 127 if line.strip() != "": |
124 line_split = line.strip().split("__tc__"); # split on tab | 128 line_split = line.strip().split("__tc__"); # split on tab |
125 if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line | 129 if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line |
126 seq_id = line_split[0]; | 130 seq_id = line_split[0]; |
131 # fix seq_id using valid chars only | |
132 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) | |
127 seq_text = line_split[1]; | 133 seq_text = line_split[1]; |
128 if seq_id in multiple_data: | 134 if seq_id in multiple_data: |
129 return "Error: the id '"+seq_id+"' is duplicated"; | 135 return "Error: the id '"+seq_id+"' is duplicated"; |
130 multiple_data[seq_id] = seq_text; | 136 multiple_data[seq_id] = seq_text; |
131 if len(multiple_data) > 0: | 137 if len(multiple_data) > 0: |