# HG changeset patch # User fabio # Date 1517437741 18000 # Node ID 027f2e9d4a257e57069e20bf8c5143c12eb57c81 # Parent f9ba0b65e1faa499f7320a6c41586ed272494c62 Uploaded 20180131 diff -r f9ba0b65e1fa -r 027f2e9d4a25 ._.shed.yml Binary file ._.shed.yml has changed diff -r f9ba0b65e1fa -r 027f2e9d4a25 ._example.tsv Binary file ._example.tsv has changed diff -r f9ba0b65e1fa -r 027f2e9d4a25 ._query.py Binary file ._query.py has changed diff -r f9ba0b65e1fa -r 027f2e9d4a25 ._query.xml Binary file ._query.xml has changed diff -r f9ba0b65e1fa -r 027f2e9d4a25 query.py --- a/query.py Wed Jan 31 16:05:25 2018 -0500 +++ b/query.py Wed Jan 31 17:29:01 2018 -0500 @@ -7,15 +7,17 @@ #from requests_futures.sessions import FuturesSession #### NN14 #### -service_url = "http://nn14.galaxyproject.org:8080/"; +SERVICE_URL = "http://nn14.galaxyproject.org:8080/"; #service_url = "http://127.0.0.1:8082/"; -query_url = service_url+"tree/0/query"; -status_url = service_url+"status/"; +QUERY_URL = SERVICE_URL+"tree/0/query"; +STATUS_URL = SERVICE_URL+"status/"; ############## # query delay in seconds -query_delay = 30; +QUERY_DELAY = 30; ############## +VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' + def query_request( options, args, payload ): # add additional parameters to the payload #payload["tree_id"] = str(options.treeid); @@ -28,7 +30,7 @@ # create a session session = requests.Session(); # make a synchronous post request to the query route - req = session.post(query_url, headers=headers, json=payload); + req = session.post(QUERY_URL, headers=headers, json=payload); resp_code = req.status_code; print(str(req.content)+"\n\n"); if resp_code == requests.codes.ok: @@ -45,7 +47,7 @@ # create a new session session = requests.Session(); # make a synchronous get request to the status route - status_query_url = status_url.replace("", task_id); + status_query_url = STATUS_URL.replace("", task_id); status_req = session.get(status_query_url); status_resp_content = str(status_req.content); print(status_resp_content+"\n\n"); @@ -59,13 +61,13 @@ elif json_status_content['state'] in ['FAILURE', 'REVOKED']: return "Task status: "+str(json_status_content['state']); else: - time.sleep(query_delay); # in seconds + time.sleep(QUERY_DELAY); # in seconds # get output dir (collection) path output_dir_path = options.outputdir; if not os.path.exists(output_dir_path): os.makedirs(output_dir_path); - out_file_format = "txt"; + out_file_format = "tabular"; for block in json_status_content['results']: seq_id = block['sequence_id']; @@ -100,6 +102,8 @@ line_split = line.strip().split("\t"); # split on tab if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line seq_id = line_split[0]; + # fix seq_id using valid chars only + seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) seq_text = line_split[1]; if seq_id in multiple_data: return "Error: the id '"+seq_id+"' is duplicated"; @@ -124,6 +128,8 @@ line_split = line.strip().split("__tc__"); # split on tab if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line seq_id = line_split[0]; + # fix seq_id using valid chars only + seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) seq_text = line_split[1]; if seq_id in multiple_data: return "Error: the id '"+seq_id+"' is duplicated"; diff -r f9ba0b65e1fa -r 027f2e9d4a25 query.xml --- a/query.xml Wed Jan 31 16:05:25 2018 -0500 +++ b/query.xml Wed Jan 31 17:29:01 2018 -0500 @@ -34,10 +34,10 @@ - + - + @@ -59,10 +59,13 @@ The input for this tool is a list of (ID, TRANSCRIPT) touples, one for each line, in a tab delimited format:: - seq_id_0 CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCA - seq_id_1 TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAAT + id0 CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCA + id1 TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAAT ... - seq_id_n CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC + idn CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC + +The ID can contain alphanumeric characters in addition to spaces, dots, dashes, and round and square brackets. +Any additional characters will be trimmed out. The output of the tool is a collection that contains a file for each ID with a list of accession numbers representing the samples that express one particular transcript.