diff discosnp_pp.xml @ 0:dc70286127fe draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/colibread commit a213833526146246d277ec7851165971449b501e
author iuc
date Fri, 20 Oct 2017 03:15:43 -0400
parents
children c2d90cc4a063
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/discosnp_pp.xml	Fri Oct 20 03:15:43 2017 -0400
@@ -0,0 +1,199 @@
+<?xml version='1.0' encoding='utf-8'?>
+<tool profile="16.04" id="discosnp_pp" name="DiscoSnp++" version="2.2.10">
+    <description>is an efficient tool for detecting SNPs without a reference genome.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="2.2.10">discosnp</requirement>
+    </requirements>
+    <command><![CDATA[
+        ## simple option
+        #if str( $input_type_options.input_type) == "single"
+            @discosnp_single_reads@
+        ## paired option
+        #else if str( $input_type_options.input_type) == "paired"
+            @discosnp_paired_reads@
+        ## mix option
+        #else
+            @discosnp_single_reads@
+            @discosnp_paired_reads@
+        #end if
+
+        #if str($VCF_option.mapping) == 'reference'
+            #set $reference_file = str($VCF_option.G) + "." + $VCF_option.G.ext
+            ln -sf '${VCF_option.G}' '${reference_file}' &&
+        #end if
+
+        run_discoSnp++.sh
+        -r input.lst
+        -b ${b}
+        -D ${D}
+        -P ${P}
+        ${low_complexity}
+        -k ${k}
+        ${t}
+        ${T}
+
+        #if str($coverage_options_type.coverage_options) == 'auto'
+            -c auto
+        #else
+            -c '${coverage_options_type.c}'
+        #end if
+        -C ${C}
+        -d ${d}
+
+        #if str($VCF_option.mapping) == 'reference'
+            -G '${reference_file}'
+            -M ${VCF_option.M}
+            ${VCF_option.R}
+        #end if
+
+  ]]></command>
+
+  <inputs>
+
+      <conditional name="input_type_options">
+          <param name="input_type" type="select" label="Input options">
+              <option value="single">Single end reads</option>
+              <option value="paired">Paired end reads</option>
+              <option value="mix">Both single and paired reads</option>
+          </param>
+          <when value="single">
+              <param name='list_reads' argument="-r" format="fasta,fastq" type="data" multiple="true" label="Single read files" />
+          </when>
+          <when value="paired">
+              <param name='list_paired_reads' argument="-r" format="fasta,fastq" type="data_collection" collection_type="list:paired" multiple='true' label="List of paired read files" />
+          </when>
+          <when value="mix">
+              <param name='list_reads' argument="-r" format="fasta,fastq" type="data" multiple="true" label="Single read files" />
+              <param name='list_paired_reads' argument="-r" format="fasta,fastq" type="data_collection" collection_type="list:paired" multiple='true' label="List of paired read files"/>
+          </when>
+      </conditional>
+
+      <param argument="-b" type="select" label="Branching strategy">
+          <option value="0">variants for which any of the two paths is branching are discarded</option>
+          <option value="1">forbid SNPs for wich the two paths are branching</option>
+          <option value="2">No limitation on branching</option>
+      </param>
+
+      <param argument="-D" type="integer" label="Deletion size" value="0" help="If different of 0, discoSnp++ will search for deletions of size from 1 to D included"/>
+      <param argument="-P" type="integer" label="Maximum SNPs per bubble" value="1" help="discoSnp++ will search up to P SNPs in a unique bubble"/>
+      <param name="low_complexity" type="boolean" checked="false" truevalue="-l" falsevalue="" label="Remove low complexity bubbles" />
+      <param argument="-k" type="integer" label="Size of kmers" value="31" />
+
+      <param argument="-t" type="boolean" checked="false" truevalue="-t" falsevalue="" label="Extends each polymorphism with left and right unitigs" />
+      <param argument="-T" type="boolean" checked="false" truevalue="-T" falsevalue="" label="Extends each polymorphism with left and right contigs" />
+
+
+      <conditional name="coverage_options_type" >
+          <param name="coverage_options" type="select" label="Coverage option">
+              <option value="auto"></option>
+              <option value="custom"></option>
+          </param>
+          <when value="auto" />
+          <when value="custom">
+              <param argument="-c" type="text" label="Minimal coverage per read set" value="4" help="e.g. 4 / 4,5,17 / 4,auto,auto"/>
+          </when>
+      </conditional>
+
+      <param argument="-C" type="integer" label="Maximal coverage per read set" value="2147483647" help="default value = 2^31-1" />
+      <param argument="-d" type="integer" label="Max number of errors per read" value="1" help="Max number of errors per read" />
+
+      <conditional name="VCF_option" >
+          <param name="mapping" type="select" label="VCF option">
+              <option value="default">Do not use reference genome</option>
+              <option value="reference">Mapping with a reference genome</option>
+          </param>
+          <when value="default"></when>
+          <when value="reference">
+              <param argument="-G" type="data" format="fasta,fastq" label="Reference genome file" />
+              <param argument="-M" type="integer" value="4" label="Maximal number of mapping errors" help="during BWA mapping phase" />
+              <param argument="-R" type="boolean" truevalue="-R" falsevalue="" checked="false" label="Use the reference genome also in the variant calling, not only for mapping results" />
+          </when>
+      </conditional>
+
+  </inputs>
+
+  <outputs>
+      <data name="vcf" from_work_dir="*_coherent.vcf" format="vcf" label="VCF with ${tool.name} on $on_string"/>
+      <data name="fasta" from_work_dir="*_coherent.fa" format="fasta" label="Multifasta with ${tool.name} on $on_string"/>
+  </outputs>
+
+  <tests>
+      <test>
+          <conditional name="input_type_options">
+              <param name="input_type" value="single"/>
+              <param name="list_reads" value="discosnp/reads1,discosnp/reads2" ftype="fasta" />
+          </conditional>
+          <param name="k" value="25"/>
+          <output name="fasta" file="discosnp/multifasta.fa"/>
+          <output name="vcf" file="discosnp/vcf_file.vcf" compare="diff" lines_diff="2"/>
+      </test>
+      <test>
+          <conditional name="input_type_options">
+              <param name="input_type" value="paired"/>
+              <param name="list_paired_reads">
+                  <collection type="list:paired">
+                      <element name="Pair1">
+                          <collection type="paired">
+                              <element name="forward" value="discosnp/reads1" ftype="fasta"/>
+                              <element name="reverse" value="discosnp/reads2" ftype="fasta"/>
+                          </collection>
+                      </element>
+                  </collection>
+              </param>
+          </conditional>
+          <conditional name="VCF_option">
+              <param name="mapping" value="reference"/>
+              <param name="G" value="discosnp/reads1" ftype="fasta"/>
+          </conditional>
+          <output name="fasta" file="discosnp/multifasta_paired.fa"/>
+          <output name="vcf" file="discosnp/vcf_file_paired.vcf" compare="diff" lines_diff="2"/>
+      </test>
+  </tests>
+
+  <help><![CDATA[
+
+**Description**
+
+Software discoSnp is designed for discovering Single Nucleotide Polymorphism (SNP) from raw set(s) of reads obtained with Next Generation Sequencers (NGS).
+Note that number of input read sets is not constrained, it can be one, two, or more. Note also that no other data as reference genome or annotations are needed.
+The software is composed by two modules. First module, kissnp2, detects SNPs from read sets. A second module, kissreads, enhance the kissnp2 results by computing per read set  and for each found SNP i/ its mean read coverage and ii/ the (phred) quality of reads generating the polymorphism.
+
+Note that from release of DiscoSnp++-2.0.6, the tool also detects close SNPs and indels.
+
+-------
+
+.. class:: warningmark
+
+**Input parameters**
+
+-Sequences files in fasta or fastq each allele will be counted in each file individually
+
+-Use collections: data list and/or data list paired
+
+-Fasta sequence of a genome if case of you are willing to map the sequence extension on a reference in order to get a compliant VCF
+
+-------
+
+.. class:: warningmark
+
+**Ouput parameters**
+
+-VCF file with coordinates on the higher branch sequences or on a reference genome if provided
+
+-Fasta file with sequence extensions around the SNP.
+
+
+-------
+
+**Web site**
+
+https://colibread.inria.fr/software/discosnp/
+
+  ]]></help>
+    <expand macro="citations">
+        <citation type="doi">10.1093/nar/gku1187</citation>
+    </expand>
+</tool>