Mercurial > repos > fabio > sbtas_se
changeset 10:027f2e9d4a25 draft
Uploaded 20180131
author | fabio |
---|---|
date | Wed, 31 Jan 2018 17:29:01 -0500 |
parents | f9ba0b65e1fa |
children | 0d0f7080b55c |
files | ._.shed.yml ._example.tsv ._query.py ._query.xml query.py query.xml |
diffstat | 6 files changed, 22 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/query.py Wed Jan 31 16:05:25 2018 -0500 +++ b/query.py Wed Jan 31 17:29:01 2018 -0500 @@ -7,15 +7,17 @@ #from requests_futures.sessions import FuturesSession #### NN14 #### -service_url = "http://nn14.galaxyproject.org:8080/"; +SERVICE_URL = "http://nn14.galaxyproject.org:8080/"; #service_url = "http://127.0.0.1:8082/"; -query_url = service_url+"tree/0/query"; -status_url = service_url+"status/<task_id>"; +QUERY_URL = SERVICE_URL+"tree/0/query"; +STATUS_URL = SERVICE_URL+"status/<task_id>"; ############## # query delay in seconds -query_delay = 30; +QUERY_DELAY = 30; ############## +VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' + def query_request( options, args, payload ): # add additional parameters to the payload #payload["tree_id"] = str(options.treeid); @@ -28,7 +30,7 @@ # create a session session = requests.Session(); # make a synchronous post request to the query route - req = session.post(query_url, headers=headers, json=payload); + req = session.post(QUERY_URL, headers=headers, json=payload); resp_code = req.status_code; print(str(req.content)+"\n\n"); if resp_code == requests.codes.ok: @@ -45,7 +47,7 @@ # create a new session session = requests.Session(); # make a synchronous get request to the status route - status_query_url = status_url.replace("<task_id>", task_id); + status_query_url = STATUS_URL.replace("<task_id>", task_id); status_req = session.get(status_query_url); status_resp_content = str(status_req.content); print(status_resp_content+"\n\n"); @@ -59,13 +61,13 @@ elif json_status_content['state'] in ['FAILURE', 'REVOKED']: return "Task status: "+str(json_status_content['state']); else: - time.sleep(query_delay); # in seconds + time.sleep(QUERY_DELAY); # in seconds # get output dir (collection) path output_dir_path = options.outputdir; if not os.path.exists(output_dir_path): os.makedirs(output_dir_path); - out_file_format = "txt"; + out_file_format = "tabular"; for block in json_status_content['results']: seq_id = block['sequence_id']; @@ -100,6 +102,8 @@ line_split = line.strip().split("\t"); # split on tab if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line seq_id = line_split[0]; + # fix seq_id using valid chars only + seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) seq_text = line_split[1]; if seq_id in multiple_data: return "Error: the id '"+seq_id+"' is duplicated"; @@ -124,6 +128,8 @@ line_split = line.strip().split("__tc__"); # split on tab if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line seq_id = line_split[0]; + # fix seq_id using valid chars only + seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) seq_text = line_split[1]; if seq_id in multiple_data: return "Error: the id '"+seq_id+"' is duplicated";
--- a/query.xml Wed Jan 31 16:05:25 2018 -0500 +++ b/query.xml Wed Jan 31 17:29:01 2018 -0500 @@ -34,10 +34,10 @@ <option value="1">By manually inserted text</option> </param> <when value="0"> - <param format="tabular" name="txtfiles" type="data" label="Select files" multiple="true" optional="true" help="Select one or more tabular files containing (ID, TRANSCRIPT) touples for each line. The content of these files will be merged and the result will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each id. The content of these files as result of the tool will be a list of accession numbers." /> + <param format="tabular" name="txtfiles" type="data" label="Select files" multiple="true" optional="true" help="Select one or more tabular files containing (ID, TRANSCRIPT) touples for each line. The content of these files will be merged and the result will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each ID. The content of these files as result of the tool will be a list of accession numbers." /> </when> <when value="1"> - <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequences" optional="true" help="Insert a list of (ID, TRANSCRIPT) touples in a tab delimited format, one for each line. The content of this text box will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each id. The content of these files as result of the tool will be a list of accession numbers." /> + <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequences" optional="true" help="Insert a list of (ID, TRANSCRIPT) touples in a tab delimited format, one for each line. The content of this text box will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each ID. The content of these files as result of the tool will be a list of accession numbers." /> </when> </conditional> <param name="sthreshold" size="3" type="float" value="0.5" min="0.0" max="1.0" label="Search threshold" help="This threshold controls the specificity. Lower values will produce more hits to the query. Higher values are more stringent and will produce fewer hits." /> @@ -59,10 +59,13 @@ The input for this tool is a list of (ID, TRANSCRIPT) touples, one for each line, in a tab delimited format:: - seq_id_0 CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCA - seq_id_1 TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAAT + id0 CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCA + id1 TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAAT ... - seq_id_n CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC + idn CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC + +The ID can contain alphanumeric characters in addition to spaces, dots, dashes, and round and square brackets. +Any additional characters will be trimmed out. The output of the tool is a collection that contains a file for each ID with a list of accession numbers representing the samples that express one particular transcript.