diff query.py @ 10:027f2e9d4a25 draft

Uploaded 20180131
author fabio
date Wed, 31 Jan 2018 17:29:01 -0500
parents 256d015d69f9
children 039e8e1e8b1f
line wrap: on
line diff
--- a/query.py	Wed Jan 31 16:05:25 2018 -0500
+++ b/query.py	Wed Jan 31 17:29:01 2018 -0500
@@ -7,15 +7,17 @@
 #from requests_futures.sessions import FuturesSession
 
 #### NN14 ####
-service_url = "http://nn14.galaxyproject.org:8080/";
+SERVICE_URL = "http://nn14.galaxyproject.org:8080/";
 #service_url = "http://127.0.0.1:8082/";
-query_url = service_url+"tree/0/query";
-status_url = service_url+"status/<task_id>";
+QUERY_URL = SERVICE_URL+"tree/0/query";
+STATUS_URL = SERVICE_URL+"status/<task_id>";
 ##############
 # query delay in seconds
-query_delay = 30;
+QUERY_DELAY = 30;
 ##############
 
+VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
+
 def query_request( options, args, payload ):
     # add additional parameters to the payload
     #payload["tree_id"] = str(options.treeid);
@@ -28,7 +30,7 @@
     # create a session
     session = requests.Session();
     # make a synchronous post request to the query route
-    req = session.post(query_url, headers=headers, json=payload);
+    req = session.post(QUERY_URL, headers=headers, json=payload);
     resp_code = req.status_code;
     print(str(req.content)+"\n\n");
     if resp_code == requests.codes.ok:
@@ -45,7 +47,7 @@
             # create a new session
             session = requests.Session();
             # make a synchronous get request to the status route
-            status_query_url = status_url.replace("<task_id>", task_id);
+            status_query_url = STATUS_URL.replace("<task_id>", task_id);
             status_req = session.get(status_query_url);
             status_resp_content = str(status_req.content);
             print(status_resp_content+"\n\n");
@@ -59,13 +61,13 @@
             elif json_status_content['state'] in ['FAILURE', 'REVOKED']:
                 return "Task status: "+str(json_status_content['state']);
             else:
-                time.sleep(query_delay); # in seconds
+                time.sleep(QUERY_DELAY); # in seconds
         
         # get output dir (collection) path
         output_dir_path = options.outputdir;
         if not os.path.exists(output_dir_path):
             os.makedirs(output_dir_path);
-        out_file_format = "txt";
+        out_file_format = "tabular";
 
         for block in json_status_content['results']:
             seq_id = block['sequence_id'];
@@ -100,6 +102,8 @@
                         line_split = line.strip().split("\t"); # split on tab
                         if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line
                             seq_id = line_split[0];
+                            # fix seq_id using valid chars only
+                            seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
                             seq_text = line_split[1];
                             if seq_id in multiple_data:
                                 return "Error: the id '"+seq_id+"' is duplicated";
@@ -124,6 +128,8 @@
                         line_split = line.strip().split("__tc__"); # split on tab
                         if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line
                             seq_id = line_split[0];
+                            # fix seq_id using valid chars only
+                            seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
                             seq_text = line_split[1];
                             if seq_id in multiple_data:
                                 return "Error: the id '"+seq_id+"' is duplicated";