changeset 0:7412632c7b0e draft

planemo upload
author jowong
date Tue, 13 Nov 2018 10:46:18 -0500
parents
children fcfd1e66edf9
files Galaxy-Workflow-spotyping_workflow.ga spotyping.xml spotyping_postprocess.py spotyping_postprocess.xml
diffstat 4 files changed, 160 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Galaxy-Workflow-spotyping_workflow.ga	Tue Nov 13 10:46:18 2018 -0500
@@ -0,0 +1,1 @@
+{"uuid": "02ea1932-1245-4a6a-bed5-c2c87c1d0a0f", "tags": [], "format-version": "0.1", "name": "spotyping workflow", "steps": {"0": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"collection_type\": \"list:paired\"}", "id": 0, "uuid": "40c4132d-fefa-4e08-bf05-ae132a041e04", "errors": null, "name": "Input dataset collection", "label": null, "inputs": [], "position": {"top": 200, "left": 445.5}, "annotation": "", "content_id": null, "type": "data_collection_input"}, "1": {"tool_id": "testtoolshed.g2.bx.psu.edu/repos/jowong/spotyping/spotyping/1.0.4", "tool_version": "1.0.4", "outputs": [{"type": "txt", "name": "spotyping_results"}, {"type": "txt", "name": "spotyping_log"}, {"type": "excel.xls", "name": "sitvit_database_query"}], "workflow_outputs": [], "input_connections": {"data_input|input1": {"output_name": "output", "id": 0}}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"data_input\": \"{\\\"data_selector\\\": \\\"paired\\\", \\\"input1\\\": {\\\"values\\\": [{\\\"src\\\": \\\"hdca\\\", \\\"id\\\": 98}]}, \\\"__current_case__\\\": 0}\", \"advanced\": \"{\\\"min_relax\\\": \\\"6\\\", \\\"swift\\\": \\\"true\\\", \\\"seq\\\": \\\"false\\\", \\\"min\\\": \\\"5\\\"}\"}", "id": 1, "tool_shed_repository": {"owner": "jowong", "changeset_revision": "74c96eb05783", "name": "spotyping", "tool_shed": "testtoolshed.g2.bx.psu.edu"}, "uuid": "b77e24c0-e22c-4f0f-8746-00b611875fe1", "errors": null, "name": "Spoligotype Prediction", "post_job_actions": {"HideDatasetActionspotyping_log": {"output_name": "spotyping_log", "action_type": "HideDatasetAction", "action_arguments": {}}, "HideDatasetActionsitvit_database_query": {"output_name": "sitvit_database_query", "action_type": "HideDatasetAction", "action_arguments": {}}, "HideDatasetActionspotyping_results": {"output_name": "spotyping_results", "action_type": "HideDatasetAction", "action_arguments": {}}}, "label": null, "inputs": [], "position": {"top": 216, "left": 764}, "annotation": "", "content_id": "testtoolshed.g2.bx.psu.edu/repos/jowong/spotyping/spotyping/1.0.4", "type": "tool"}, "2": {"tool_id": "spotyping_postprocess", "tool_version": "1.0.0", "outputs": [{"type": "txt", "name": "output"}], "workflow_outputs": [{"output_name": "output", "uuid": "a8c15a67-ca48-4143-82ab-5b6afd8976aa", "label": null}], "input_connections": {"paths": {"output_name": "spotyping_results", "id": 1}}, "tool_state": "{\"paths\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"__page__\": null}", "id": 2, "uuid": "35b3cc55-573e-44fb-a1ae-b76852690bbd", "errors": null, "name": "Spotyping Postprocess", "post_job_actions": {}, "label": null, "inputs": [{"name": "paths", "description": "runtime parameter for tool Spotyping Postprocess"}], "position": {"top": 268, "left": 1082}, "annotation": "", "content_id": "spotyping_postprocess", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spotyping.xml	Tue Nov 13 10:46:18 2018 -0500
@@ -0,0 +1,111 @@
+<tool id="spotyping" name="Spoligotype Prediction" version="1.0.4">
+    <description>fast and accurate in silico Mycobacterium spoligotyping from sequence reads</description>
+    <requirements>
+        <requirement type="package" version="2.1">spotyping</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        SpoTyping.py
+        $advanced.seq
+        $advanced.swift
+        --min=$advanced.min
+        --rmin=$advanced.min_relax
+        #if str( $data_input.data_selector ) == "paired"       
+            $data_input.input1.forward $data_input.input1.reverse
+        #end if
+        #if str( $data_input.data_selector ) == "single"       
+            $data_input.input2
+        #end if
+        && cp SITVIT_ONLINE.*.xls spotyping.xls
+        ]]>
+    </command>
+    <inputs>
+        <conditional name="data_input">
+            <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset">
+                <option value="paired">Paired</option>
+                <option value="single">Single</option>
+            </param>
+            <when value="paired">
+                <param name="input1" format="data" type="data_collection" collection_type="paired" label="Select a paired collection" help="a paired data"/>
+            </when>
+            <when value="single">
+                <param name="input2" format="data" type="data_collection" label="input" help="Specify dataset with single reads"/>
+            </when>
+        </conditional>
+        <section name="advanced" title="Advanced options" expanded="false">
+            <param type="boolean" argument="--seq" label="Input is assembled sequence" help="Input is either a complete genomic sequence or assembled contigs from an isolate" truevalue="--seq" falsevalue="" checked="false" />
+            <param type="boolean" argument="--swift" label="Swift mode" checked="true" truevalue="--swift=on" falsevalue="--swift=off" />
+            <param name="min" type="integer" value="5" label="MIN" help="minimum number of error-free hits to support presence of a spacer" />
+            <param name="min_relax" type="integer" value="6" label="MIN RELAX" help="minimum number of 1-error-tolerant hits to support presence of a spacer " />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="spotyping_results" label="spoligotyping results" format="txt" from_work_dir="SpoTyping"/>
+        <data name="spotyping_log" label="spoligotyping log" format="txt" from_work_dir="SpoTyping.log"/>
+        <data name="sitvit_database_query" label="query" format="excel.xls" from_work_dir="spotyping.xls"/>
+    </outputs>
+    <help><![CDATA[
+This is a modified version of IUC's wrapper of spotyping without the concatenation and renaming of the input files. The wrapper also runs properly when supplied with paired-end reads
+
+            SpoTyping_ is a software for predicting spoligotype_ from sequencing reads, complete genomic sequences and assembled contigs.
+
+    **Input:**
+
+    - Fastq file - if paired end data is used, you may choose to concatenate paired reads into a single input (e.g. using the cat tool)
+    - Fasta file of a complete genomic sequence or assembled contigs of an isolate (with --seq option)
+
+    *Note on input size*: In swift mode the sampling threshold is reached in approximately 30x coverage when using
+    paired end sequencing of a *M. tuberculosis* genome.
+
+    **Output:**
+
+    Count of hits from BLAST result for each spacer sequence and predicted spoligotype in the format of binary code and octal code.
+
+    **Options:**
+
+
+    \--seq
+    Set this if input is a fasta file that contains only complete genomic sequence or assembled contigs from an isolate. [Default is off]
+
+    \-s SWIFT, --swift=SWIFT
+    Swift mode, either "on" or "off" [Default: on] - swift mode samples 250 million bases to use for spoligotyping
+
+    \-m MIN, --min=MIN
+    minimum number of error-free hits to support presence of a spacer [Default: 5]
+    
+    \-r MIN_RELAX, --rmin=MIN_RELAX
+    minimum number of 1-error-tolerant hits to support presence of a spacer [Default: 6].
+
+
+
+    **Got weird spoligotype prediction?**
+
+    Sequencing throughput is very low (<40Mbp, for example): SpoTyping may not be able to give accurate prediction due to the relatively low read depth.
+
+    **Interpreting the spoligotype**
+
+    The binary or octal spoligotype can be used to look up lineage information using a service
+    like `TB Lineage`_.
+
+    **SITVIT reports**
+
+    Optionally a report on the detected spoligotype can be retrieved from the SITVIT_ database. If such a report is requested it can also be
+    illustrated as a (PDF format) plot.
+
+  .. _SpoTyping: https://github.com/xiaeryu/SpoTyping
+  .. _spoligotype: https://www.ncbi.nlm.nih.gov/pubmed/19521871
+  .. _TB Lineage: http://tbinsight.cs.rpi.edu/run_tb_lineage.html
+  .. _SITVIT: http://www.pasteur-guadeloupe.fr:8081/SITVIT_ONLINE/ 
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubSpoTyping,
+  author = {Xia, Eryu},
+  year = {2016},
+  title = {SpoTyping},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/xiaeryu/SpoTyping},
+}</citation>
+        <citation type="doi">10.1186/s13073-016-0270-7</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spotyping_postprocess.py	Tue Nov 13 10:46:18 2018 -0500
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+import sys
+import argparse as ap
+import re
+parser = ap.ArgumentParser(prog='spotyping_postprocess', conflict_handler='resolve',
+                           description="Postprocess galaxy spotyping output")
+
+input = parser.add_argument_group('Input', '')
+input.add_argument('-s', '--sample', nargs='+', required=True, help="Sample names")
+input.add_argument('-f', '--file', nargs='+', required=True, help="File of spotyping in galaxy convention")
+if len(sys.argv) == 0:
+    parser.print_usage()
+    sys.exit(1)
+
+args = parser.parse_args()
+
+
+sample_dict={}
+
+
+with open('spotyping_postprocess_output.txt', 'w') as output:
+	index = 0
+	for path in args.file:
+		with open(path) as f:
+			for line in f:
+				#sample_dict[re.sub(".*/","",line.rstrip())] = args.sample[path_index]
+				sample = re.sub('(_1.fastq(.gz)*|_2.fastq(.gz)*|.fastq(.gz)*)', '', args.sample[index].rstrip().lstrip())
+				output.write(re.sub(".*.dat", sample, line))
+		index += 1
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spotyping_postprocess.xml	Tue Nov 13 10:46:18 2018 -0500
@@ -0,0 +1,17 @@
+<tool id="spotyping_postprocess" name="Spotyping Postprocess" version="1.0.0">
+    <description>Postprocess spotyping galaxy output</description>
+    <command interpreter="python"><![CDATA[
+        spotyping_postprocess.py  -s #for $path in $paths# $path.element_identifier #end for# -f #for $path in $paths# $path #end for#
+    ]]></command>
+    <inputs>
+        <param name="paths" type="data" format="txt" label="Collection of files" help="" optional="False" multiple="True"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="txt" from_work_dir="spotyping_postprocess_output.txt"/>
+    </outputs>
+    <help>
+This tool processes the spotyping galaxy output such that it is in line with the command line
+    </help>
+    <citations>
+    </citations>
+</tool>