diff valet.xml @ 0:56236acaad45 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/valet commit 30ecbd2ebd336d7002ca11abd69d600a24986156
author iuc
date Thu, 16 Nov 2017 08:54:46 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/valet.xml	Thu Nov 16 08:54:46 2017 -0500
@@ -0,0 +1,318 @@
+<tool id="valet" name="VALET" version="@WRAPPER_VERSION@.0">
+    <description>to detect mis-assemblies in metagenomic assemblies</description>
+    <macros>
+        <token name="@WRAPPER_VERSION@">1.0</token>
+        <token name="@INPUT_@">
+            1.0
+        </token>
+        <xml name="insert_size">
+            <param argument="--minins" type="integer" min="0" value="0" label="Min insert sizes for mate pairs" />
+            <param argument="--maxins" type="integer" min="0" value="500" label="Max insert sizes for mate pairs" />
+        </xml>
+        <xml name="orientation">
+            <param argument="--orientation" type="select" label="Orientation of the mates" >
+                <option value="fr">fr: mate 1 appears upstream of the reverse complement of mate 2 or mate 2 appears upstream of the reverse
+                complement of mate 1</option>
+                <option value="rf">rf: reverse-complemented mate 1 is upstream and forward-oriented mate 2 is downstream</option>
+                <option value="ff">ff: both upstream mate 1 and downstream mate 2 are forward-oriented</option>
+            </param>
+        </xml>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@WRAPPER_VERSION@">valet</requirement>
+    </requirements>
+    <version_command>echo @WRAPPER_VERSION@</version_command>
+    <command detect_errors="exit_code">
+<![CDATA[
+valet.py
+    #set assembly_fasta = []
+    #set assembly_names = []
+    #for $repeat in $assembly
+        $assembly_fasta.append(str($repeat.assembly_fasta))
+        $assembly_names.append(str($repeat.assembly_names))
+    #end for
+    --assembly-fasta ${','.join($assembly_fasta)}
+    --assembly-names ${','.join($assembly_names)}
+    #if $input_reads.type == 'single'
+        $input_reads.single_input_reads.type
+        --reads '$input_reads.single_input_reads.reads'
+    #else if $input_reads.type == 'paired'
+        #set mate_1 = []
+        #set mate_2 = []
+        #set minins = []
+        #set maxins = []
+        #for $repeat in $input_reads.paired_input_reads.paired_reads
+            $mate_1.append(str($repeat.mate_1))
+            $mate_2.append(str($repeat.mate_2))
+            $minins.append(str($repeat.minins))
+            $maxins.append(str($repeat.maxins))
+        #end for
+        --1 ${','.join($mate_1)}
+        --2 ${','.join($mate_2)}
+        --minins ${','.join($minins)}
+        --maxins ${','.join($maxins)}
+        $input_reads.paired_input_reads.type
+        --orientation '$input_reads.orientation'
+    #else if $input_reads.type == 'paired_collection'
+        #set mate_1 = []
+        #set mate_2 = []
+        #set minins = []
+        #set maxins = []
+        #for $repeat in $input_reads.paired_coll_input_reads.paired_collection_reads
+            $mate_1.append(str($repeat.input.forward))
+            $mate_2.append(str($repeat.input.reverse))
+            $minins.append(str($repeat.minins))
+            $maxins.append(str($repeat.maxins))
+        #end for
+        --1 ${','.join($mate_1)}
+        --2 ${','.join($mate_2)}
+        --minins ${','.join($minins)}
+        --maxins ${','.join($maxins)}
+        $input_reads.paired_coll_input_reads.type
+        --orientation '$input_reads.orientation'
+    #end if
+    --output-dir output
+    --window-size '$window_size'
+    --threads \${GALAXY_SLOTS:-4}
+    --max-alignments '$max_alignments'
+    --min-coverage '$min_coverage'
+    --coverage-multiplier '$coverage_multiplier'
+    --min-suspicious '$min_suspicious'
+    --suspicious-flank-size '$suspicious_flank_size'
+    --min-contig-length '$min_contig_length'
+    --ignore-ends '$ignore_ends'
+    --breakpoint-bin '$breakpoint_bin'
+    #if $orf_file
+        --orf-file '$orf_file'
+    #end if
+    #if $coverage_file
+        --coverage-file '$coverage_file'
+    #end if
+    --kmer '$kmer'
+    --skip-reapr
+#for $repeat in $assembly
+    && mv output/${repeat.assembly_names}/summary.bed output/${repeat.assembly_names}_summary.bed
+    && mv output/${repeat.assembly_names}/summary.tsv output/${repeat.assembly_names}_summary.tsv
+    && mv output/${repeat.assembly_names}/suspicious.bed output/${repeat.assembly_names}_suspicious.bed
+#end for
+    ]]></command>
+    <inputs>
+        <repeat name="assembly" title="Candidate assemblies">
+            <param name="assembly_fasta" argument="--assembly-fasta" type="data" format="fasta" label="Candidate assembly file" />
+            <param name="assembly_names" argument="--assembly-names" type="text" value="" label="Name of the assembly">
+                <validator type="empty_field" message="A name is required"/>
+            </param>
+        </repeat>
+        <conditional name="input_reads">
+            <param name="type" type="select" label="Type of input reads used for the assembly">
+                <option value="single">Single</option>
+                <option value="paired">Paired</option>
+                <option value="paired_collection">Paired-collection</option>
+            </param>
+            <when value="single">
+              <conditional name="single_input_reads">
+                    <param name="type" type="select" label="Input format">
+                        <option value="--fasta">Fasta</option>
+                        <option value="--fastq">FastQ</option>
+                    </param>
+                    <when value="--fasta">
+                        <param argument="--reads" type="data" format="fasta" label="Assembly input reads" />
+                    </when>
+                    <when value="--fastq">
+                        <param argument="--reads" type="data" format="fastq" label="Assembly input reads" />
+                    </when>
+                </conditional>
+            </when>
+            <when value="paired">
+                <conditional name="paired_input_reads">
+                    <param name="type" type="select" label="Input format">
+                        <option value="--fasta">Fasta</option>
+                        <option value="--fastq">FastQ</option>
+                    </param>
+                    <when value="--fasta">
+                        <repeat name="paired_reads" title="Mate pair reads">
+                            <param name="mate_1" argument="--1" type="data" format="fasta" label="Assembly input first mate reads" />
+                            <param name="mate_2" argument="--2" type="data" format="fasta" label="Assembly input second mate reads" />
+                            <expand macro="insert_size"/>
+                        </repeat>
+                    </when>
+                    <when value="--fastq">
+                        <repeat name="paired_reads" title="Mate pair reads">
+                            <param name="mate_1" argument="--1" type="data" format="fastq" label="Assembly input first mate reads" />
+                            <param name="mate_2" argument="--2" type="data" format="fastq" label="Assembly input second mate reads" />
+                            <expand macro="insert_size"/>
+                        </repeat>
+                    </when>
+                </conditional>
+                <expand macro="orientation"/>
+            </when>
+            <when value="paired_collection">
+                <conditional name="paired_coll_input_reads">
+                    <param name="type" type="select" label="Input format">
+                        <option value="--fasta">Fasta</option>
+                        <option value="--fastq">FastQ</option>
+                    </param>
+                    <when value="--fasta">
+                        <repeat name="paired_collection_reads" title="Mate paired read collections">
+                            <param name="input" format="fasta" type="data_collection" collection_type="paired" label="Assembly input reads" />
+                            <expand macro="insert_size"/>
+                        </repeat>
+                    </when>
+                    <when value="--fastq">
+                        <repeat name="paired_collection_reads" title="Mate paired read collections">
+                            <param name="input" format="fastq" type="data_collection" collection_type="paired" label="Assembly input reads" />
+                            <expand macro="insert_size"/>
+                        </repeat>
+                    </when>
+                </conditional>
+                <expand macro="orientation"/>
+            </when>
+        </conditional>
+        <param name="window_size" argument="--window-size" type="integer" min="0" value="501" label="Sliding window size when determining misassemblies" />
+        <param name="max_alignments" argument="--max-alignments" type="integer" min="0" value="10000" label="Bowtie2 parameter to set the max number of alignments" />
+        <param name="min_coverage" argument="--min-coverage" type="integer" min="0" value="0" label="Minimum average coverage to run misassembly detection" />
+        <param name="coverage_multiplier" argument="--coverage-multiplier" type="float" min="0" value="0" label="When binning by coverage, the new high = high + high * multiplier" />
+        <param name="min_suspicious" argument="--min-suspicious" type="integer" min="0" value="2" label="Minimum number of overlapping flagged miassemblies to mark region as suspicious" />
+        <param name="suspicious_flank_size" argument="--suspicious-flank-size" type="integer" min="0" value="2000" label="Mark region as suspicious if multiple signatures occur within this window size" />
+        <param name="min_contig_length" argument="--min-contig-length" type="integer" min="0" value="1000" label="Ignore contigs smaller than this length" />
+        <param name="ignore_ends" argument="--ignore-ends" type="integer" min="0" value="0" label="Ignore flagged regions within b bps from the ends of the contigs" />
+        <param name="breakpoint_bin" argument="--breakpoint-bin" type="integer" min="0" value="50" label="Bin sized used to find breakpoints" />
+        <param name="kmer" argument="--kmer" type="integer" min="0" value="15" label="Kmer length used for abundance estimation" />
+        <param name="coverage_file" argument="--coverage-file" type="data" format="tabular,txt" optional="true" label="Assembly created per-contig coverage file" />
+        <param name="orf_file" argument="--orf-file" type="data" format="gff,gtf" optional="true" label="File containing ORFs" />
+    </inputs>
+    <outputs>
+        <collection name="flagged" type="list" label="${tool.name} on ${on_string}: Flagged regions">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_summary.bed" format="bed" directory="output"/>
+        </collection>
+        <collection name="suspicious" type="list" label="${tool.name} on ${on_string}: Suspicious regions">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_suspicious.bed" format="bed" directory="output"/>
+        </collection>
+        <collection name="summary" type="list" label="${tool.name} on ${on_string}: Summary">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_summary.tsv" format="tabular" directory="output"/>
+        </collection>
+        <data name="comparison_plot" format="pdf" from_work_dir="output/comparison_plots.pdf" label="${tool.name} on ${on_string}: Comparison plot" />
+    </outputs>
+    <tests>
+        <test>
+          <repeat name="assembly">
+              <param name="assembly_fasta" value="c_rudii_reference.fna"/>
+              <param name="assembly_names" value="reference"/>
+          </repeat>
+          <repeat name="assembly">
+              <param name="assembly_fasta" value="c_rudii_dup.fna"/>
+              <param name="assembly_names" value="duplication"/>
+          </repeat>
+          <repeat name="assembly">
+              <param name="assembly_fasta" value="c_rudii_relocation.fna"/>
+              <param name="assembly_names" value="relocation"/>
+          </repeat>
+          <repeat name="assembly">
+              <param name="assembly_fasta" value="c_rudii_reloc_dup.fna"/>
+              <param name="assembly_names" value="reloc-dup"/>
+          </repeat>
+          <conditional name="input_reads">
+              <param name="type" value="paired"/>
+              <conditional name="paired_input_reads">
+                  <param name="type" value="--fastq"/>
+                  <repeat name="paired_reads">
+                      <param name="mate_1" value="lib1.1.fastq" />
+                      <param name="mate_2" value="lib1.2.fastq" />
+                      <param name="minins" value="0"/>
+                      <param name="maxins" value="500" />
+                  </repeat>
+              </conditional>
+              <param name="orientation" value="fr" />
+          </conditional>
+          <param name="window_size" value="501"/>
+          <param name="max_alignments" value="10000"/>
+          <param name="min_coverage" value="0" />
+          <param name="coverage_multiplier" value="0"/>
+          <param name="min_suspicious" value="2" />
+          <param name="suspicious_flank_size" value="2000" />
+          <param name="min_contig_length" value="1000"/>
+          <param name="ignore_ends" value="0"/>
+          <param name="breakpoint_bin" value="50" />
+          <param name="kmer" value="15" />
+          <param name="coverage_file" value="carsonella_asm.cvg" />
+          <output_collection name="flagged" type="list">
+              <element name="reference" ftype="bed" file="flagged_reference.bed"/>
+              <element name="duplication" ftype="bed" file="flagged_duplication.bed"/>
+              <element name="relocation" ftype="bed" file="flagged_relocation.bed"/>
+              <element name="reloc-dup" ftype="bed" file="flagged_reloc-dup.bed"/>
+          </output_collection>
+          <output_collection name="suspicious" type="list">
+              <element name="reference" ftype="bed" file="suspicious_reference.bed"/>
+              <element name="duplication" ftype="bed" file="suspicious_duplication.bed"/>
+              <element name="relocation" ftype="bed" file="suspicious_relocation.bed"/>
+              <element name="reloc-dup" ftype="bed" file="suspicious_reloc-dup.bed"/>
+          </output_collection>
+          <output_collection name="summary" type="list">
+              <element name="reference" ftype="tabular" file="summary_reference.tabular"/>
+              <element name="duplication" ftype="tabular" file="summary_duplication.tabular"/>
+              <element name="relocation" ftype="tabular" file="summary_relocation.tabular"/>
+              <element name="reloc-dup" ftype="tabular" file="summary_reloc-dup.tabular"/>
+          </output_collection>
+          <output name="comparison_plot" file="test1_comparison_plot.pdf" compare="sim_size"/>
+        </test>
+        <test>
+          <repeat name="assembly">
+              <param name="assembly_fasta" value="c_rudii_dup.fna"/>
+              <param name="assembly_names" value="duplication"/>
+          </repeat>
+          <conditional name="input_reads">
+              <param name="type" value="paired_collection"/>
+              <conditional name="paired_coll_input_reads">
+                  <param name="type" value="--fastq"/>
+                  <repeat name="paired_collection_reads">
+                      <param name="input">
+                          <collection type="paired">
+                              <element name="forward" value="lib1.1.fastq" ftype="fastq" />
+                              <element name="reverse" value="lib1.2.fastq" ftype="fastq" />
+                          </collection>
+                      </param>
+                      <param name="minins" value="0"/>
+                      <param name="maxins" value="500" />
+                  </repeat>
+              </conditional>
+              <param name="orientation" value="fr" />
+          </conditional>
+          <param name="window_size" value="501"/>
+          <param name="max_alignments" value="10000"/>
+          <param name="min_coverage" value="0" />
+          <param name="coverage_multiplier" value="0"/>
+          <param name="min_suspicious" value="2" />
+          <param name="suspicious_flank_size" value="2000" />
+          <param name="min_contig_length" value="1000"/>
+          <param name="ignore_ends" value="0"/>
+          <param name="breakpoint_bin" value="50" />
+          <param name="kmer" value="15" />
+          <output_collection name="flagged" type="list">
+              <element name="duplication" ftype="bed" file="flagged_duplication.bed"/>
+          </output_collection>
+          <output_collection name="suspicious" type="list">
+            <element name="duplication" ftype="bed" file="suspicious_duplication.bed"/>
+          </output_collection>
+          <output_collection name="summary" type="list">
+            <element name="duplication" ftype="tabular" file="summary_duplication.tabular"/>
+          </output_collection>
+          <output name="comparison_plot" file="test2_comparison_plot.pdf" compare="sim_size"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+VALET is a de novo pipeline for detecting all types of mis-assemblies in metagenomic data sets.
+
+Its primarily adapts the approaches developed in the context of isolate genomes. To avoid false positives and false
+negatives because of uneven depth of coverage, VALET bins contig by coverage before applying these methods.
+
+Possible break points in the assembly are found by examining regions, where a large number of parts of the reads are
+unable to align. To identify break points, VALET uses the first and last third of each unaligned read, called sister
+reads. The sister reads are aligned independently to the reference genome, and then regions where the sister reads
+align to nonadjacent segments of the genome are flagged as mis-assemblies.
+
+For more details about the tool, please check: https://github.com/marbl/VALET
+    ]]></help>
+    <citations/>
+</tool>