# HG changeset patch
# User fabio
# Date 1517437741 18000
# Node ID 027f2e9d4a257e57069e20bf8c5143c12eb57c81
# Parent  f9ba0b65e1faa499f7320a6c41586ed272494c62
Uploaded 20180131

diff -r f9ba0b65e1fa -r 027f2e9d4a25 ._.shed.yml
Binary file ._.shed.yml has changed
diff -r f9ba0b65e1fa -r 027f2e9d4a25 ._example.tsv
Binary file ._example.tsv has changed
diff -r f9ba0b65e1fa -r 027f2e9d4a25 ._query.py
Binary file ._query.py has changed
diff -r f9ba0b65e1fa -r 027f2e9d4a25 ._query.xml
Binary file ._query.xml has changed
diff -r f9ba0b65e1fa -r 027f2e9d4a25 query.py
--- a/query.py	Wed Jan 31 16:05:25 2018 -0500
+++ b/query.py	Wed Jan 31 17:29:01 2018 -0500
@@ -7,15 +7,17 @@
 #from requests_futures.sessions import FuturesSession
 
 #### NN14 ####
-service_url = "http://nn14.galaxyproject.org:8080/";
+SERVICE_URL = "http://nn14.galaxyproject.org:8080/";
 #service_url = "http://127.0.0.1:8082/";
-query_url = service_url+"tree/0/query";
-status_url = service_url+"status/<task_id>";
+QUERY_URL = SERVICE_URL+"tree/0/query";
+STATUS_URL = SERVICE_URL+"status/<task_id>";
 ##############
 # query delay in seconds
-query_delay = 30;
+QUERY_DELAY = 30;
 ##############
 
+VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
+
 def query_request( options, args, payload ):
     # add additional parameters to the payload
     #payload["tree_id"] = str(options.treeid);
@@ -28,7 +30,7 @@
     # create a session
     session = requests.Session();
     # make a synchronous post request to the query route
-    req = session.post(query_url, headers=headers, json=payload);
+    req = session.post(QUERY_URL, headers=headers, json=payload);
     resp_code = req.status_code;
     print(str(req.content)+"\n\n");
     if resp_code == requests.codes.ok:
@@ -45,7 +47,7 @@
             # create a new session
             session = requests.Session();
             # make a synchronous get request to the status route
-            status_query_url = status_url.replace("<task_id>", task_id);
+            status_query_url = STATUS_URL.replace("<task_id>", task_id);
             status_req = session.get(status_query_url);
             status_resp_content = str(status_req.content);
             print(status_resp_content+"\n\n");
@@ -59,13 +61,13 @@
             elif json_status_content['state'] in ['FAILURE', 'REVOKED']:
                 return "Task status: "+str(json_status_content['state']);
             else:
-                time.sleep(query_delay); # in seconds
+                time.sleep(QUERY_DELAY); # in seconds
         
         # get output dir (collection) path
         output_dir_path = options.outputdir;
         if not os.path.exists(output_dir_path):
             os.makedirs(output_dir_path);
-        out_file_format = "txt";
+        out_file_format = "tabular";
 
         for block in json_status_content['results']:
             seq_id = block['sequence_id'];
@@ -100,6 +102,8 @@
                         line_split = line.strip().split("\t"); # split on tab
                         if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line
                             seq_id = line_split[0];
+                            # fix seq_id using valid chars only
+                            seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
                             seq_text = line_split[1];
                             if seq_id in multiple_data:
                                 return "Error: the id '"+seq_id+"' is duplicated";
@@ -124,6 +128,8 @@
                         line_split = line.strip().split("__tc__"); # split on tab
                         if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line
                             seq_id = line_split[0];
+                            # fix seq_id using valid chars only
+                            seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
                             seq_text = line_split[1];
                             if seq_id in multiple_data:
                                 return "Error: the id '"+seq_id+"' is duplicated";
diff -r f9ba0b65e1fa -r 027f2e9d4a25 query.xml
--- a/query.xml	Wed Jan 31 16:05:25 2018 -0500
+++ b/query.xml	Wed Jan 31 17:29:01 2018 -0500
@@ -34,10 +34,10 @@
                 <option value="1">By manually inserted text</option>
             </param>
             <when value="0">
-                <param format="tabular" name="txtfiles" type="data" label="Select files" multiple="true" optional="true" help="Select one or more tabular files containing (ID, TRANSCRIPT) touples for each line. The content of these files will be merged and the result will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each id. The content of these files as result of the tool will be a list of accession numbers." />
+                <param format="tabular" name="txtfiles" type="data" label="Select files" multiple="true" optional="true" help="Select one or more tabular files containing (ID, TRANSCRIPT) touples for each line. The content of these files will be merged and the result will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each ID. The content of these files as result of the tool will be a list of accession numbers." />
             </when>
             <when value="1">
-                <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequences" optional="true" help="Insert a list of (ID, TRANSCRIPT) touples in a tab delimited format, one for each line. The content of this text box will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each id. The content of these files as result of the tool will be a list of accession numbers." />
+                <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequences" optional="true" help="Insert a list of (ID, TRANSCRIPT) touples in a tab delimited format, one for each line. The content of this text box will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each ID. The content of these files as result of the tool will be a list of accession numbers." />
             </when>
         </conditional>            
         <param name="sthreshold" size="3" type="float" value="0.5" min="0.0" max="1.0" label="Search threshold" help="This threshold controls the specificity. Lower values will produce more hits to the query. Higher values are more stringent and will produce fewer hits." />
@@ -59,10 +59,13 @@
 The input for this tool is a list of (ID, TRANSCRIPT) touples, one for each line,
 in a tab delimited format::
     
-    seq_id_0  CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCA
-    seq_id_1  TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAAT
+    id0  CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCA
+    id1  TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAAT
     ...
-    seq_id_n  CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC
+    idn  CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC
+
+The ID can contain alphanumeric characters in addition to spaces, dots, dashes, and round and square brackets.
+Any additional characters will be trimmed out.
 
 The output of the tool is a collection that contains a file for each ID with a list of
 accession numbers representing the samples that express one particular transcript.