Mercurial > repos > jowong > spotyping
changeset 0:7412632c7b0e draft
planemo upload
author | jowong |
---|---|
date | Tue, 13 Nov 2018 10:46:18 -0500 |
parents | |
children | fcfd1e66edf9 |
files | Galaxy-Workflow-spotyping_workflow.ga spotyping.xml spotyping_postprocess.py spotyping_postprocess.xml |
diffstat | 4 files changed, 160 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Galaxy-Workflow-spotyping_workflow.ga Tue Nov 13 10:46:18 2018 -0500 @@ -0,0 +1,1 @@ +{"uuid": "02ea1932-1245-4a6a-bed5-c2c87c1d0a0f", "tags": [], "format-version": "0.1", "name": "spotyping workflow", "steps": {"0": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"collection_type\": \"list:paired\"}", "id": 0, "uuid": "40c4132d-fefa-4e08-bf05-ae132a041e04", "errors": null, "name": "Input dataset collection", "label": null, "inputs": [], "position": {"top": 200, "left": 445.5}, "annotation": "", "content_id": null, "type": "data_collection_input"}, "1": {"tool_id": "testtoolshed.g2.bx.psu.edu/repos/jowong/spotyping/spotyping/1.0.4", "tool_version": "1.0.4", "outputs": [{"type": "txt", "name": "spotyping_results"}, {"type": "txt", "name": "spotyping_log"}, {"type": "excel.xls", "name": "sitvit_database_query"}], "workflow_outputs": [], "input_connections": {"data_input|input1": {"output_name": "output", "id": 0}}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"data_input\": \"{\\\"data_selector\\\": \\\"paired\\\", \\\"input1\\\": {\\\"values\\\": [{\\\"src\\\": \\\"hdca\\\", \\\"id\\\": 98}]}, \\\"__current_case__\\\": 0}\", \"advanced\": \"{\\\"min_relax\\\": \\\"6\\\", \\\"swift\\\": \\\"true\\\", \\\"seq\\\": \\\"false\\\", \\\"min\\\": \\\"5\\\"}\"}", "id": 1, "tool_shed_repository": {"owner": "jowong", "changeset_revision": "74c96eb05783", "name": "spotyping", "tool_shed": "testtoolshed.g2.bx.psu.edu"}, "uuid": "b77e24c0-e22c-4f0f-8746-00b611875fe1", "errors": null, "name": "Spoligotype Prediction", "post_job_actions": {"HideDatasetActionspotyping_log": {"output_name": "spotyping_log", "action_type": "HideDatasetAction", "action_arguments": {}}, "HideDatasetActionsitvit_database_query": {"output_name": "sitvit_database_query", "action_type": "HideDatasetAction", "action_arguments": {}}, "HideDatasetActionspotyping_results": {"output_name": "spotyping_results", "action_type": "HideDatasetAction", "action_arguments": {}}}, "label": null, "inputs": [], "position": {"top": 216, "left": 764}, "annotation": "", "content_id": "testtoolshed.g2.bx.psu.edu/repos/jowong/spotyping/spotyping/1.0.4", "type": "tool"}, "2": {"tool_id": "spotyping_postprocess", "tool_version": "1.0.0", "outputs": [{"type": "txt", "name": "output"}], "workflow_outputs": [{"output_name": "output", "uuid": "a8c15a67-ca48-4143-82ab-5b6afd8976aa", "label": null}], "input_connections": {"paths": {"output_name": "spotyping_results", "id": 1}}, "tool_state": "{\"paths\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"__page__\": null}", "id": 2, "uuid": "35b3cc55-573e-44fb-a1ae-b76852690bbd", "errors": null, "name": "Spotyping Postprocess", "post_job_actions": {}, "label": null, "inputs": [{"name": "paths", "description": "runtime parameter for tool Spotyping Postprocess"}], "position": {"top": 268, "left": 1082}, "annotation": "", "content_id": "spotyping_postprocess", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spotyping.xml Tue Nov 13 10:46:18 2018 -0500 @@ -0,0 +1,111 @@ +<tool id="spotyping" name="Spoligotype Prediction" version="1.0.4"> + <description>fast and accurate in silico Mycobacterium spoligotyping from sequence reads</description> + <requirements> + <requirement type="package" version="2.1">spotyping</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + SpoTyping.py + $advanced.seq + $advanced.swift + --min=$advanced.min + --rmin=$advanced.min_relax + #if str( $data_input.data_selector ) == "paired" + $data_input.input1.forward $data_input.input1.reverse + #end if + #if str( $data_input.data_selector ) == "single" + $data_input.input2 + #end if + && cp SITVIT_ONLINE.*.xls spotyping.xls + ]]> + </command> + <inputs> + <conditional name="data_input"> + <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset"> + <option value="paired">Paired</option> + <option value="single">Single</option> + </param> + <when value="paired"> + <param name="input1" format="data" type="data_collection" collection_type="paired" label="Select a paired collection" help="a paired data"/> + </when> + <when value="single"> + <param name="input2" format="data" type="data_collection" label="input" help="Specify dataset with single reads"/> + </when> + </conditional> + <section name="advanced" title="Advanced options" expanded="false"> + <param type="boolean" argument="--seq" label="Input is assembled sequence" help="Input is either a complete genomic sequence or assembled contigs from an isolate" truevalue="--seq" falsevalue="" checked="false" /> + <param type="boolean" argument="--swift" label="Swift mode" checked="true" truevalue="--swift=on" falsevalue="--swift=off" /> + <param name="min" type="integer" value="5" label="MIN" help="minimum number of error-free hits to support presence of a spacer" /> + <param name="min_relax" type="integer" value="6" label="MIN RELAX" help="minimum number of 1-error-tolerant hits to support presence of a spacer " /> + </section> + </inputs> + <outputs> + <data name="spotyping_results" label="spoligotyping results" format="txt" from_work_dir="SpoTyping"/> + <data name="spotyping_log" label="spoligotyping log" format="txt" from_work_dir="SpoTyping.log"/> + <data name="sitvit_database_query" label="query" format="excel.xls" from_work_dir="spotyping.xls"/> + </outputs> + <help><![CDATA[ +This is a modified version of IUC's wrapper of spotyping without the concatenation and renaming of the input files. The wrapper also runs properly when supplied with paired-end reads + + SpoTyping_ is a software for predicting spoligotype_ from sequencing reads, complete genomic sequences and assembled contigs. + + **Input:** + + - Fastq file - if paired end data is used, you may choose to concatenate paired reads into a single input (e.g. using the cat tool) + - Fasta file of a complete genomic sequence or assembled contigs of an isolate (with --seq option) + + *Note on input size*: In swift mode the sampling threshold is reached in approximately 30x coverage when using + paired end sequencing of a *M. tuberculosis* genome. + + **Output:** + + Count of hits from BLAST result for each spacer sequence and predicted spoligotype in the format of binary code and octal code. + + **Options:** + + + \--seq + Set this if input is a fasta file that contains only complete genomic sequence or assembled contigs from an isolate. [Default is off] + + \-s SWIFT, --swift=SWIFT + Swift mode, either "on" or "off" [Default: on] - swift mode samples 250 million bases to use for spoligotyping + + \-m MIN, --min=MIN + minimum number of error-free hits to support presence of a spacer [Default: 5] + + \-r MIN_RELAX, --rmin=MIN_RELAX + minimum number of 1-error-tolerant hits to support presence of a spacer [Default: 6]. + + + + **Got weird spoligotype prediction?** + + Sequencing throughput is very low (<40Mbp, for example): SpoTyping may not be able to give accurate prediction due to the relatively low read depth. + + **Interpreting the spoligotype** + + The binary or octal spoligotype can be used to look up lineage information using a service + like `TB Lineage`_. + + **SITVIT reports** + + Optionally a report on the detected spoligotype can be retrieved from the SITVIT_ database. If such a report is requested it can also be + illustrated as a (PDF format) plot. + + .. _SpoTyping: https://github.com/xiaeryu/SpoTyping + .. _spoligotype: https://www.ncbi.nlm.nih.gov/pubmed/19521871 + .. _TB Lineage: http://tbinsight.cs.rpi.edu/run_tb_lineage.html + .. _SITVIT: http://www.pasteur-guadeloupe.fr:8081/SITVIT_ONLINE/ + ]]></help> + <citations> + <citation type="bibtex"> +@misc{githubSpoTyping, + author = {Xia, Eryu}, + year = {2016}, + title = {SpoTyping}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/xiaeryu/SpoTyping}, +}</citation> + <citation type="doi">10.1186/s13073-016-0270-7</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spotyping_postprocess.py Tue Nov 13 10:46:18 2018 -0500 @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +import sys +import argparse as ap +import re +parser = ap.ArgumentParser(prog='spotyping_postprocess', conflict_handler='resolve', + description="Postprocess galaxy spotyping output") + +input = parser.add_argument_group('Input', '') +input.add_argument('-s', '--sample', nargs='+', required=True, help="Sample names") +input.add_argument('-f', '--file', nargs='+', required=True, help="File of spotyping in galaxy convention") +if len(sys.argv) == 0: + parser.print_usage() + sys.exit(1) + +args = parser.parse_args() + + +sample_dict={} + + +with open('spotyping_postprocess_output.txt', 'w') as output: + index = 0 + for path in args.file: + with open(path) as f: + for line in f: + #sample_dict[re.sub(".*/","",line.rstrip())] = args.sample[path_index] + sample = re.sub('(_1.fastq(.gz)*|_2.fastq(.gz)*|.fastq(.gz)*)', '', args.sample[index].rstrip().lstrip()) + output.write(re.sub(".*.dat", sample, line)) + index += 1 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spotyping_postprocess.xml Tue Nov 13 10:46:18 2018 -0500 @@ -0,0 +1,17 @@ +<tool id="spotyping_postprocess" name="Spotyping Postprocess" version="1.0.0"> + <description>Postprocess spotyping galaxy output</description> + <command interpreter="python"><![CDATA[ + spotyping_postprocess.py -s #for $path in $paths# $path.element_identifier #end for# -f #for $path in $paths# $path #end for# + ]]></command> + <inputs> + <param name="paths" type="data" format="txt" label="Collection of files" help="" optional="False" multiple="True"/> + </inputs> + <outputs> + <data name="output" format="txt" from_work_dir="spotyping_postprocess_output.txt"/> + </outputs> + <help> +This tool processes the spotyping galaxy output such that it is in line with the command line + </help> + <citations> + </citations> +</tool>