Mercurial > repos > fabio > sbtas_se
comparison query.py @ 12:039e8e1e8b1f draft
Uploaded 20180201
author | fabio |
---|---|
date | Thu, 01 Feb 2018 16:23:17 -0500 |
parents | 027f2e9d4a25 |
children | e780b47013df |
comparison
equal
deleted
inserted
replaced
11:0d0f7080b55c | 12:039e8e1e8b1f |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 # https://github.com/ross/requests-futures | 3 # https://github.com/ross/requests-futures |
4 # http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests | 4 # http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests |
5 | 5 |
6 import os, uuid, optparse, requests, json, time | 6 import sys, os, uuid, optparse, requests, json, time |
7 #from requests_futures.sessions import FuturesSession | 7 #from requests_futures.sessions import FuturesSession |
8 | 8 |
9 #### NN14 #### | 9 #### NN14 #### |
10 SERVICE_URL = "http://nn14.galaxyproject.org:8080/"; | 10 SERVICE_URL = "http://nn14.galaxyproject.org:8080/"; |
11 #service_url = "http://127.0.0.1:8082/"; | 11 #service_url = "http://127.0.0.1:8082/"; |
14 ############## | 14 ############## |
15 # query delay in seconds | 15 # query delay in seconds |
16 QUERY_DELAY = 30; | 16 QUERY_DELAY = 30; |
17 ############## | 17 ############## |
18 | 18 |
19 __version__ = "1.0.0"; | |
19 VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' | 20 VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' |
20 | 21 |
22 # in the case of collections, exitcodes equal to 0 and 1 are not considered errors | |
23 def raiseException( exitcode, message, errorfilepath ): | |
24 with open(errorfilepath, 'w') as out: | |
25 out.write(message); | |
26 sys.exit(exitcode); | |
27 | |
21 def query_request( options, args, payload ): | 28 def query_request( options, args, payload ): |
29 output_dir_path = options.outputdir; | |
22 # add additional parameters to the payload | 30 # add additional parameters to the payload |
23 #payload["tree_id"] = str(options.treeid); | 31 #payload["tree_id"] = str(options.treeid); |
24 payload["search_mode"] = str(options.search); | 32 payload["search_mode"] = str(options.search); |
25 payload["exact_algorithm"] = int(options.exact); | 33 payload["exact_algorithm"] = int(options.exact); |
26 payload["search_threshold"] = float(options.sthreshold); | 34 payload["search_threshold"] = float(options.sthreshold); |
30 # create a session | 38 # create a session |
31 session = requests.Session(); | 39 session = requests.Session(); |
32 # make a synchronous post request to the query route | 40 # make a synchronous post request to the query route |
33 req = session.post(QUERY_URL, headers=headers, json=payload); | 41 req = session.post(QUERY_URL, headers=headers, json=payload); |
34 resp_code = req.status_code; | 42 resp_code = req.status_code; |
35 print(str(req.content)+"\n\n"); | 43 #print(str(req.content)+"\n\n"); |
36 if resp_code == requests.codes.ok: | 44 if resp_code == requests.codes.ok: |
37 resp_content = str(req.content); | 45 resp_content = str(req.content); |
38 # convert out to json | 46 # convert out to json |
39 json_content = json.loads(resp_content); | 47 json_content = json.loads(resp_content); |
40 # retrieve task id | 48 # retrieve task id |
41 task_id = json_content['task_id']; | 49 task_id = json_content['task_id']; |
42 task_processed = False; | 50 task_processed = False; |
43 # results json content | 51 # results json content |
44 json_status_content = None; | 52 json_status_content = None; |
45 task_status = None; | |
46 while task_processed is False: | 53 while task_processed is False: |
47 # create a new session | 54 # create a new session |
48 session = requests.Session(); | 55 session = requests.Session(); |
49 # make a synchronous get request to the status route | 56 # make a synchronous get request to the status route |
50 status_query_url = STATUS_URL.replace("<task_id>", task_id); | 57 status_query_url = STATUS_URL.replace("<task_id>", task_id); |
51 status_req = session.get(status_query_url); | 58 status_req = session.get(status_query_url); |
52 status_resp_content = str(status_req.content); | 59 status_resp_content = str(status_req.content); |
53 print(status_resp_content+"\n\n"); | 60 #print(status_resp_content+"\n\n"); |
54 # convert out to json | 61 # convert out to json |
55 json_status_content = json.loads(status_resp_content); | 62 json_status_content = json.loads(status_resp_content); |
56 # take a look at the state | 63 # take a look at the state |
57 # state attribute is always available | 64 # state attribute is always available |
58 if json_status_content['state'] == 'SUCCESS': | 65 if json_status_content['state'] == 'SUCCESS': |
59 task_processed = True; | 66 task_processed = True; |
60 break; | 67 break; |
61 elif json_status_content['state'] in ['FAILURE', 'REVOKED']: | 68 elif json_status_content['state'] in ['FAILURE', 'REVOKED']: |
62 return "Task status: "+str(json_status_content['state']); | 69 return raiseException( 1, "Task ID: "+str(task_id)+"\nTask status: "+str(json_status_content['state']), str(options.errorfile) ); |
63 else: | 70 else: |
64 time.sleep(QUERY_DELAY); # in seconds | 71 time.sleep(QUERY_DELAY); # in seconds |
65 | 72 |
66 # get output dir (collection) path | |
67 output_dir_path = options.outputdir; | |
68 if not os.path.exists(output_dir_path): | |
69 os.makedirs(output_dir_path); | |
70 out_file_format = "tabular"; | 73 out_file_format = "tabular"; |
71 | |
72 for block in json_status_content['results']: | 74 for block in json_status_content['results']: |
73 seq_id = block['sequence_id']; | 75 seq_id = block['sequence_id']; |
74 accessions = block['accession_numbers']; | 76 accessions = block['accession_numbers']; |
75 # put response block in the output collection | 77 # put response block in the output collection |
76 output_file_path = os.path.join(output_dir_path, seq_id + "_" + out_file_format); | 78 output_file_path = os.path.join(output_dir_path, seq_id + "_" + out_file_format); |
77 accessions_list = ""; | 79 accessions_list = ""; |
78 for accession_number in accessions: | 80 for accession_number in accessions: |
79 accessions_list = accessions_list + accession_number + "\n"; | 81 accessions_list = accessions_list + accession_number + "\n"; |
80 with open(output_file_path, 'w') as out: | 82 with open(output_file_path, 'w') as out: |
81 out.write(accessions_list.strip()); | 83 out.write(accessions_list.strip()); |
84 return sys.exit(0); | |
82 else: | 85 else: |
83 return "Unable to query the remote server. Please try again in a while."; | 86 return raiseException( 1, "Unable to query the remote server. Please try again in a while.", str(options.errorfile) ); |
84 | 87 |
85 def query( options, args ): | 88 def query( options, args ): |
89 output_dir_path = options.outputdir; | |
86 multiple_data = {}; | 90 multiple_data = {}; |
87 comma_sep_file_paths = options.files; | 91 comma_sep_file_paths = options.files; |
88 #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths))); | 92 #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths))); |
89 # check if options.files contains at least one file path | 93 # check if options.files contains at least one file path |
90 if comma_sep_file_paths is not None: | 94 if comma_sep_file_paths is not None: |
104 seq_id = line_split[0]; | 108 seq_id = line_split[0]; |
105 # fix seq_id using valid chars only | 109 # fix seq_id using valid chars only |
106 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) | 110 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) |
107 seq_text = line_split[1]; | 111 seq_text = line_split[1]; |
108 if seq_id in multiple_data: | 112 if seq_id in multiple_data: |
109 return "Error: the id '"+seq_id+"' is duplicated"; | 113 return raiseException( 1, "Error: the id '"+seq_id+"' is duplicated", str(options.errorfile) ); |
110 multiple_data[seq_id] = seq_text; | 114 multiple_data[seq_id] = seq_text; |
111 if len(multiple_data) > 0: | 115 if len(multiple_data) > 0: |
112 return query_request( options, args, multiple_data ); | 116 return query_request( options, args, multiple_data ); |
113 #return echo( options, args ); | 117 #return echo( options, args ); |
114 else: | 118 else: |
115 return "An error has occurred. Please be sure that your input files are valid."; | 119 return raiseException( 1, "An error has occurred. Please be sure that your input files are valid.", str(options.errorfile) ); |
116 else: | 120 else: |
117 # try with the sequence in --sequence | 121 # try with the sequence in --sequence |
118 text_content = options.sequences; | 122 text_content = options.sequences; |
119 #print("sequences: "+text_content); | 123 #print("sequences: "+text_content); |
120 # check if options.sequences contains a list of sequences (one for each row) | 124 # check if options.sequences contains a list of sequences (one for each row) |
130 seq_id = line_split[0]; | 134 seq_id = line_split[0]; |
131 # fix seq_id using valid chars only | 135 # fix seq_id using valid chars only |
132 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) | 136 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) |
133 seq_text = line_split[1]; | 137 seq_text = line_split[1]; |
134 if seq_id in multiple_data: | 138 if seq_id in multiple_data: |
135 return "Error: the id '"+seq_id+"' is duplicated"; | 139 return raiseException( 1, "Error: the id '"+seq_id+"' is duplicated", str(options.errorfile) ); |
136 multiple_data[seq_id] = seq_text; | 140 multiple_data[seq_id] = seq_text; |
137 if len(multiple_data) > 0: | 141 if len(multiple_data) > 0: |
138 return query_request( options, args, multiple_data ); | 142 return query_request( options, args, multiple_data ); |
139 #return echo( options, args ); | 143 #return echo( options, args ); |
140 else: | 144 else: |
141 return "An error has occurred. Please be sure that your input files are valid."; | 145 return raiseException( 1, "An error has occurred. Please be sure that your input files are valid.", str(options.errorfile) ); |
142 else: | 146 else: |
143 return "You have to insert at least one row formatted as a tab delimited <id, sequence> touple"; | 147 return raiseException( 1, "You have to insert at least one row formatted as a tab delimited (ID, SEQUENCE) couple", str(options.errorfile) ); |
144 return -1; | 148 return 1; |
145 | 149 |
146 def __main__(): | 150 def __main__(): |
147 # Parse the command line options | 151 # Parse the command line options |
148 usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path"; | 152 usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path"; |
149 parser = optparse.OptionParser(usage = usage); | 153 parser = optparse.OptionParser(usage = usage); |
154 parser.add_option("-v", "--version", action="store_true", dest="version", | |
155 default=False, help="display version and exit") | |
150 parser.add_option("-f", "--files", type="string", | 156 parser.add_option("-f", "--files", type="string", |
151 action="store", dest="files", help="comma separated files path"); | 157 action="store", dest="files", help="comma separated files path"); |
152 parser.add_option("-n", "--names", type="string", | 158 parser.add_option("-n", "--names", type="string", |
153 action="store", dest="names", help="comma separated names associated to the files specified in --files"); | 159 action="store", dest="names", help="comma separated names associated to the files specified in --files"); |
154 parser.add_option("-s", "--sequences", type="string", | 160 parser.add_option("-s", "--sequences", type="string", |
159 action="store", dest="search", help="search mode"); | 165 action="store", dest="search", help="search mode"); |
160 parser.add_option("-e", "--exact", type="int", default=0, | 166 parser.add_option("-e", "--exact", type="int", default=0, |
161 action="store", dest="exact", help="exact algorithm (required if search is 1 only)"); | 167 action="store", dest="exact", help="exact algorithm (required if search is 1 only)"); |
162 parser.add_option("-t", "--sthreshold", type="float", | 168 parser.add_option("-t", "--sthreshold", type="float", |
163 action="store", dest="sthreshold", help="threshold applied to the search algrithm"); | 169 action="store", dest="sthreshold", help="threshold applied to the search algrithm"); |
164 parser.add_option("-o", "--outputdir", type="string", | 170 parser.add_option("-o", "--outputdir", type="string", default="output", |
165 action="store", dest="outputdir", help="output directory (collection) path"); | 171 action="store", dest="outputdir", help="output directory (collection) path"); |
172 parser.add_option("-r", "--errorfile", type="string", default="error.log", | |
173 action="store", dest="errorfile", help="error file name containing error messages"); | |
166 | 174 |
167 #parser.add_option("-k", "--outfile", type="string", | |
168 #action="store", dest="outfile", help="output file"); | |
169 | |
170 # TEST | 175 # TEST |
171 #--search 'rrr' | |
172 #--sthreshold 0.5 | |
173 #--exact 0 | |
174 #--sequences 'id0__tc__CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC' | |
175 #--outputdir 'collection_content' | |
176 #sequences = 'NM_001169378.2__tc__atttcggatgctttggagggaggaactctagtgctgcattgattggggcgtgtgttaatgatattcccagttcgcatggcgagcatcgattcctggtacgtatgtgggccccttgactcccacttatcgcacttgtcgttcgcaatttgcatgaattccgcttcgtctgaaacgcacttgcgccagacttctccggctggtctgatctggtctgtgatccggtctggtggggcgccagttgcgtttcgagctcatcaccagtcactccgcagtcgcattctgccagaggtctccgatcaagagcgcttctccattcgagattcaaacgcagcgcggtctgacgccgccacatcgagtgaaatccatatcgatggccacattcacacaggacgagatcgacttcctgcgcagccatggcaacgagctgtgtgccaagacctggctgggattgtgggatccgaagcgggctgtgcaccagcaggagcagcgcgaactgatgatggacaagtatgagcggaagcgatactacctggagccggccagtcctcttaagtcgctggccaatgcggtcaacctgaagtcgtctgctccggcgacgaaccacactcagaatggccaccaaaatgggtatgccagcatccatttgacgcctcctgctgcccagcggacctcggccaatggattgcagaaggtggccaactcgtcgagtaactcttctggaaagacctcatcctcgatcagtaggccacactataatcaccagaacaacagccaaaacaacaatcacgatgcctttggcctgggtggcggattgagcagcctgaacagcgccggttccacatccactggagctctttccgacaccagcagttgtgctagcaatggcttcggtgcggactgcgactttgtggctgactttggctcggccaacattttcgacgccacatcggcgcgttccacaggatcgccggcggtgtcgtccgtgtcctcagtgggttccagcaatggctacgccaaggtgcagcccatccgggcagctcatctccagcagcaacagcagttgcagcagcagctgcatcagcagcagctcctcaatggcaatggtcatcagggcactgagaactttgccgacttcgatcacgctcccatctacaatgcagtggctccaccgacttttaacgattggatcagcgactggagcaggcggggcttccacgatcccttcgacgattgcgatgactcgccaccaggtgcccgccctccagcacctgcgccagctcctgctcaagttcccgcagtatcatcaccattgccaaccgtccgagaagaaccagagcttgcgtggaatttttgggaggacgagatgcgaatagaggcgcaggaaaaggagtcccaaactaaacagccggagttgggctactccttttcgattagtactactacgcccctttccccttcgaatcccttcctgccctaccttgtcagtgaggagcagcatcgaaatcatccagagaagccctccttttcgtattcgttgttcagctccatatcaaatagttcgcaagaagatcaggcggatgatcatgagatgaatgttttaaatgccaatttccatgatttctttacgtggagtgctcccttgcagaacggccatacgaccagtccgcccaagggcggaaatgcagcgatggcgcccagtgaggatcgatatgccgctcttaaggatctcgacgagcagctgcgagaactgaaggccagcgaaagcgccacagagacgcccacgcccaccagtggcaatgttcaggccacagatgcctttggtggagccctcaacaacaatccaaatcccttcaagggccagcaacagcagcagctcagcagccatgtggtgaatccattccagcagcagcaacagcagcagcaccagcagaatctctatggccagttgacgctcataccaaatgcctacggcagcagttcccagcagcagatggggcaccatctcctccagcagcagcagcagcaacagcagagcttcttcaacttcaacaacaacgggttcgccatctcgcagggtctgcccaacggctgcggcttcggcagcatgcaacccgctcctgtgatggccaacaatccctttgcagccagcggcgccatgaacaccaacaatccattcttatgagactcaacccgggagaatccgcctcgcgccacctggcagaggcgctgagccagcgaacaaagagcagacgcggaggaaccgaaccgaaattagtccattttactaacaatagcgttaatctatgtatacataatgcacgccggagagcactctttgtgtacatagcccaaatatgtacacccgaaaggctccacgctgacgctagtcctcgcggatggcggaggcggactggggcgttgatatattcttttacatggtaactctactctaacgtttacggatacggatatttgtatttgccgtttgccctagaactctatacttgtactaagcgcccatgaacacttcatccactaacatagctactaatcctcatcctagtggaggatgcagttggtccagacactctgttatttgttttatccatcctcgtacttgtctttgtcccatttagcactttcgttgcggataagaactttgtcagttattgattgtgtggccttaataagattataaaactaaatattataacgtacgactatacatatacggatacagatacagattcagacacagttagtacagatacagatatacatatacgcttttgtacctaatgaattgcttcttgtttccattgctaatcatctgcttttcgtgtgctaattttatacactagtacgtgcgatatcggccgtgcagatagattgctcagctcgcgagtcaagcctcttttggttgcacccacggcagacatttgtacatatactgtctgattgtaagcctcgtgtaatacctccattaacaccactcccccaccacccatccatcgaaccccgaatccatgactcaattcactgctcacatgtccatgcccatgccttaacgtgtcaaacattatcgaagccttaaagttatttaaaactacgaaatttcaataaaaacaaataagaacgctatc'; | 176 #sequences = 'NM_001169378.2__tc__atttcggatgctttggagggaggaactctagtgctgcattgattggggcgtgtgttaatgatattcccagttcgcatggcgagcatcgattcctggtacgtatgtgggccccttgactcccacttatcgcacttgtcgttcgcaatttgcatgaattccgcttcgtctgaaacgcacttgcgccagacttctccggctggtctgatctggtctgtgatccggtctggtggggcgccagttgcgtttcgagctcatcaccagtcactccgcagtcgcattctgccagaggtctccgatcaagagcgcttctccattcgagattcaaacgcagcgcggtctgacgccgccacatcgagtgaaatccatatcgatggccacattcacacaggacgagatcgacttcctgcgcagccatggcaacgagctgtgtgccaagacctggctgggattgtgggatccgaagcgggctgtgcaccagcaggagcagcgcgaactgatgatggacaagtatgagcggaagcgatactacctggagccggccagtcctcttaagtcgctggccaatgcggtcaacctgaagtcgtctgctccggcgacgaaccacactcagaatggccaccaaaatgggtatgccagcatccatttgacgcctcctgctgcccagcggacctcggccaatggattgcagaaggtggccaactcgtcgagtaactcttctggaaagacctcatcctcgatcagtaggccacactataatcaccagaacaacagccaaaacaacaatcacgatgcctttggcctgggtggcggattgagcagcctgaacagcgccggttccacatccactggagctctttccgacaccagcagttgtgctagcaatggcttcggtgcggactgcgactttgtggctgactttggctcggccaacattttcgacgccacatcggcgcgttccacaggatcgccggcggtgtcgtccgtgtcctcagtgggttccagcaatggctacgccaaggtgcagcccatccgggcagctcatctccagcagcaacagcagttgcagcagcagctgcatcagcagcagctcctcaatggcaatggtcatcagggcactgagaactttgccgacttcgatcacgctcccatctacaatgcagtggctccaccgacttttaacgattggatcagcgactggagcaggcggggcttccacgatcccttcgacgattgcgatgactcgccaccaggtgcccgccctccagcacctgcgccagctcctgctcaagttcccgcagtatcatcaccattgccaaccgtccgagaagaaccagagcttgcgtggaatttttgggaggacgagatgcgaatagaggcgcaggaaaaggagtcccaaactaaacagccggagttgggctactccttttcgattagtactactacgcccctttccccttcgaatcccttcctgccctaccttgtcagtgaggagcagcatcgaaatcatccagagaagccctccttttcgtattcgttgttcagctccatatcaaatagttcgcaagaagatcaggcggatgatcatgagatgaatgttttaaatgccaatttccatgatttctttacgtggagtgctcccttgcagaacggccatacgaccagtccgcccaagggcggaaatgcagcgatggcgcccagtgaggatcgatatgccgctcttaaggatctcgacgagcagctgcgagaactgaaggccagcgaaagcgccacagagacgcccacgcccaccagtggcaatgttcaggccacagatgcctttggtggagccctcaacaacaatccaaatcccttcaagggccagcaacagcagcagctcagcagccatgtggtgaatccattccagcagcagcaacagcagcagcaccagcagaatctctatggccagttgacgctcataccaaatgcctacggcagcagttcccagcagcagatggggcaccatctcctccagcagcagcagcagcaacagcagagcttcttcaacttcaacaacaacgggttcgccatctcgcagggtctgcccaacggctgcggcttcggcagcatgcaacccgctcctgtgatggccaacaatccctttgcagccagcggcgccatgaacaccaacaatccattcttatgagactcaacccgggagaatccgcctcgcgccacctggcagaggcgctgagccagcgaacaaagagcagacgcggaggaaccgaaccgaaattagtccattttactaacaatagcgttaatctatgtatacataatgcacgccggagagcactctttgtgtacatagcccaaatatgtacacccgaaaggctccacgctgacgctagtcctcgcggatggcggaggcggactggggcgttgatatattcttttacatggtaactctactctaacgtttacggatacggatatttgtatttgccgtttgccctagaactctatacttgtactaagcgcccatgaacacttcatccactaacatagctactaatcctcatcctagtggaggatgcagttggtccagacactctgttatttgttttatccatcctcgtacttgtctttgtcccatttagcactttcgttgcggataagaactttgtcagttattgattgtgtggccttaataagattataaaactaaatattataacgtacgactatacatatacggatacagatacagattcagacacagttagtacagatacagatatacatatacgcttttgtacctaatgaattgcttcttgtttccattgctaatcatctgcttttcgtgtgctaattttatacactagtacgtgcgatatcggccgtgcagatagattgctcagctcgcgagtcaagcctcttttggttgcacccacggcagacatttgtacatatactgtctgattgtaagcctcgtgtaatacctccattaacaccactcccccaccacccatccatcgaaccccgaatccatgactcaattcactgctcacatgtccatgcccatgccttaacgtgtcaaacattatcgaagccttaaagttatttaaaactacgaaatttcaataaaaacaaataagaacgctatc'; |
177 #print(sequences); | |
178 #(options, args) = parser.parse_args(['-x', 'rrr', '-t', 0.5, '-s', sequences, '-o', 'collection_content']); | 177 #(options, args) = parser.parse_args(['-x', 'rrr', '-t', 0.5, '-s', sequences, '-o', 'collection_content']); |
179 | 178 |
180 (options, args) = parser.parse_args(); | 179 (options, args) = parser.parse_args(); |
181 return query( options, args ); | 180 if options.version: |
181 print __version__; | |
182 else: | |
183 # create output dir (collection) | |
184 output_dir_path = options.outputdir; | |
185 if not os.path.exists(output_dir_path): | |
186 os.makedirs(output_dir_path); | |
187 | |
188 return query( options, args ); | |
182 | 189 |
183 if __name__ == "__main__": __main__() | 190 if __name__ == "__main__": __main__() |