diff unicycler.xml @ 9:6e26c9afd301 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/unicycler commit d95fed0458171a099928d51e363865783f89717f
author iuc
date Sat, 24 Sep 2022 21:14:02 +0000
parents 9e3e80cc4ad4
children d10bdad2fd17
line wrap: on
line diff
--- a/unicycler.xml	Wed Nov 18 20:26:04 2020 +0000
+++ b/unicycler.xml	Sat Sep 24 21:14:02 2022 +0000
@@ -1,7 +1,11 @@
-<tool id="unicycler" name="Create assemblies with Unicycler" version="@VERSION@.0" profile="20.09">
+<tool id="unicycler" name="Create assemblies with Unicycler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09">
     <macros>
-        <token name="@VERSION@">0.4.8</token>
+        <token name="@TOOL_VERSION@">0.5.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
     </macros>
+    <xrefs>
+        <xref type="bio.tools">unicycler</xref>
+    </xrefs>
     <edam_topics>
         <edam_topic>topic_0196</edam_topic>
     </edam_topics>
@@ -9,7 +13,8 @@
         <edam_operation>operation_0525</edam_operation>
     </edam_operations>
     <requirements>
-        <requirement type="package" version="@VERSION@">unicycler</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">unicycler</requirement>
+        <requirement type="package" version="1.15.1">samtools</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
 ## Preparing files
@@ -57,13 +62,10 @@
     #end if
     ln -s '${long}' '$lr' &&
 #end if
-## Get location for pilon installation
-pilon=`pilon --jar_dir` &&
 ## Running Unicycler
 unicycler -t "\${GALAXY_SLOTS:-4}"
 -o ./
 --verbosity 3
---pilon_path \$pilon
 #if str( $paired_unpaired.fastq_input_selector ) == "paired"
     -1 '$fq1'
     -2 '$fq2'
@@ -86,7 +88,6 @@
 #end if
 ## Spades Options section
 ## ----------------------------------------------------------
-$spades.no_correct
 --min_kmer_frac '$spades.min_kmer_frac'
 --max_kmer_frac '$spades.max_kmer_frac'
 #if str($spades.kmers) != ''
@@ -105,12 +106,6 @@
 #end if
 --start_gene_id '$rotation.start_gene_id'
 --start_gene_cov '$rotation.start_gene_cov'
-## Pilon Options section
-## ----------------------------------------------------------
-$pilon.no_pilon
-#if str($pilon.min_polish_size) != ''
-    --min_polish_size '$pilon.min_polish_size'
-#end if
 ## Graph cleaning Options sdection
 ## ----------------------------------------------------------
 --min_component_size '$graph_clean.min_component_size'
@@ -124,6 +119,15 @@
 #if str($lr_align.low_score) != ''
     --low_score '$lr_align.low_score'
 #end if
+$lr_align.no_simple_bridges
+--keep $keep
+#if $keep != '0'
+    && mkdir 'spades_graphs'
+    && mv 00*gfa './spades_graphs/'
+#end if
+#if $keep == '2' and $long
+        && samtools view -@ "\${GALAXY_SLOTS:-4}" -u 'read_alignment/long_read_alignments.sam' | samtools sort -@ "\${GALAXY_SLOTS:-4}" -o 'read_alignment/long_read_alignments.bam'
+#end if
     ]]></command>
     <inputs>
         <conditional name="paired_unpaired">
@@ -160,8 +164,6 @@
         <param argument="--min_anchor_seg_len" type="integer" min="0" optional="true" label="Unicycler will not use segments shorter than this as scaffolding anchors"/>
         <section name="spades" expanded="False" title="SPAdes options"
             help="Unicycler uses SPAdes to construct assembly graphs. You can modify some of the SPAdes settings here. Use this ONLY if you know what you are doing!">
-            <param argument="--no_correct" type="boolean" checked="false" truevalue="--no_correct" falsevalue=""
-                label="Skip SPAdes error correction step" help="This option turns off SPAdes error correction. Generally it is highly recommended to use correction."/>
             <param argument="--min_kmer_frac" type="float" min="0" max="1" value="0.2"
                 label="Lowest k-mer size for SPAdes assembly, expressed as a fraction of the read length"/>
             <param argument="--max_kmer_frac" type="float" min="0" max="1" value="0.95"
@@ -183,11 +185,6 @@
             <param argument="--start_gene_id" type="float" min="0" max="100" value="90" label="The minimum required BLAST percent identity for a start gene search"/>
             <param argument="--start_gene_cov" type="float" min="0" max="100" value="95" label="The minimum required BLAST percent coverage for a start gene search"/>
         </section>
-        <section name="pilon" title="Pilon options" expanded="false">
-            <param argument="--no_pilon" type="boolean" checked="false" truevalue="--no_pilon" falsevalue=""
-                label="Do not use Pilon to polish the final assembly." help="Unicycler uses Pilon tool for polishing final assembly."/>
-            <param argument="--min_polish_size" type="integer" min="0" value="1000" label="Contigs shorter than this value (bp) will not be polished using Pilon"/>
-        </section>
         <section name="graph_clean" expanded="false" title="Graph cleaning options"
             help="These options control the removal of small leftover sequences after bridging is complete.">
             <param argument="--min_component_size" type="integer" min="0" value="1000"
@@ -201,14 +198,27 @@
             <param argument="--scores" type="text" value="3,-6,-5,-2" label="Comma-delimited string of alignment scores: match, mismatch, gap open, gap extend"/>
             <param argument="--low_score" optional="true" type="integer" value=""
                 label="Score threshold - alignments below this are considered poor" help="default = set automatically"/>
+            <param argument="--no_simple_bridges" type="boolean" truevalue="--no_simple_bridges" falsevalue="" checked="false" label="Simple long-read bridging" help="Default: No" />
         </section>
+        <param argument="--keep" type="select" label="Outputs to keep" help="Level of file retention. Default: 1">
+            <option value="0">0: only keep final files</option>
+            <option value="1" selected="true">1: save graphs at main checkpoints</option>
+            <option value="2">2: also keep SAM</option>
+        </param>
     </inputs>
     <outputs>
         <data name="assembly_graph" format="gfa1" from_work_dir="assembly.gfa" label="${tool.name} on ${on_string}: Final Assembly Graph" />
         <data name="assembly" format="fasta" from_work_dir="assembly.fasta" label="${tool.name} on ${on_string}: Final Assembly"/>
+        <collection name="spades_collection" type="list" label="${tool.name} on ${on_string}: SPAdes graphs">
+            <discover_datasets pattern="__designation_and_ext__" format="gfa1" directory="spades_graphs"/>
+            <filter>keep != "0"</filter>
+        </collection>
+        <data name="bam_file" format="bam" from_work_dir="read_alignment/long_read_alignments.bam" label="${tool.name} on ${on_string}: Long read alignments BAM">
+            <filter>keep == "2" and long</filter>
+        </data>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="2">
             <conditional name="paired_unpaired">
                 <param name="fastq_input_selector" value="paired" />
                 <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" />
@@ -218,7 +228,6 @@
             <param name="min_fasta_length" value="100"/>
             <param name="linear_seqs" value="0"/>
             <section name="spades">
-                <param name="no_correct" value="true"/>
                 <param name="min_kmer_frac" value="0.2"/>
                 <param name="max_kmer_frac" value="0.95"/>
                 <param name="kmer_count" value="10"/>
@@ -229,10 +238,6 @@
                 <param name="start_gene_id" value="90"/>
                 <param name="start_gene_cov" value="95"/>
             </section>
-            <section name="pilon">
-                <param name="no_pilon" value=""/>
-                <param name="min_polish_size" value="1000"/>
-            </section>
             <section name="graph_clean">
                 <param name="min_component_size" value="1000"/>
                 <param name="min_dead_end_size" value="1000"/>
@@ -240,6 +245,7 @@
             <section name="lr_align">
                 <param name="scores" value="3,-6,-5,-2"/>
             </section>
+            <param name="keep" value="0"/>
             <output name="assembly_graph" ftype="gfa1">
                 <assert_contents>
                     <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/>
@@ -262,7 +268,7 @@
             during the minimap step (which seems to be compiled C code). A gist of the log can be found
             at: https://gist.github.com/jmchilton/b411b695170c1daea6589f5d76e326cb.
         -->
-        <test>
+        <test expect_num_outputs="2">
             <conditional name="paired_unpaired">
                 <param name="fastq_input_selector" value="paired" />
                 <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger.gz" />
@@ -273,7 +279,6 @@
             <param name="min_fasta_length" value="100"/>
             <param name="linear_seqs" value="0"/>
             <section name="spades">
-                <param name="no_correct" value="true"/>
                 <param name="min_kmer_frac" value="0.2"/>
                 <param name="max_kmer_frac" value="0.95"/>
                 <param name="kmer_count" value="10"/>
@@ -284,10 +289,6 @@
                 <param name="start_gene_id" value="90"/>
                 <param name="start_gene_cov" value="95"/>
             </section>
-            <section name="pilon">
-                <param name="no_pilon" value=""/>
-                <param name="min_polish_size" value="1000"/>
-            </section>
             <section name="graph_clean">
                 <param name="min_component_size" value="1000"/>
                 <param name="min_dead_end_size" value="1000"/>
@@ -295,6 +296,7 @@
             <section name="lr_align">
                 <param name="scores" value="3,-6,-5,-2"/>
             </section>
+            <param name="keep" value="0"/>
             <output name="assembly_graph" ftype="gfa1">
                 <assert_contents>
                     <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/>
@@ -306,7 +308,7 @@
                 </assert_contents>
             </output>
         </test>
-        <test>
+        <test expect_num_outputs="2">
             <conditional name="paired_unpaired">
                 <param name="fastq_input_selector" value="paired_collection"/>
                 <param name="fastq_input1">
@@ -320,7 +322,6 @@
             <param name="min_fasta_length" value="100"/>
             <param name="linear_seqs" value="0"/>
             <section name="spades">
-                <param name="no_correct" value="true"/>
                 <param name="min_kmer_frac" value="0.2"/>
                 <param name="max_kmer_frac" value="0.95"/>
                 <param name="kmer_count" value="10"/>
@@ -331,10 +332,6 @@
                 <param name="start_gene_id" value="90"/>
                 <param name="start_gene_cov" value="95"/>
             </section>
-            <section name="pilon">
-                <param name="no_pilon" value="true"/>
-                <param name="min_polish_size" value="1000"/>
-            </section>
             <section name="graph_clean">
                 <param name="min_component_size" value="1000"/>
                 <param name="min_dead_end_size" value="1000"/>
@@ -342,6 +339,112 @@
             <section name="lr_align">
                 <param name="scores" value="3,-6,-5,-2"/>
             </section>
+            <param name="keep" value="0"/>
+            <output name="assembly_graph" ftype="gfa1">
+                <assert_contents>
+                    <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/>
+                </assert_contents>
+            </output>
+            <output name="assembly" ftype="fasta">
+                <assert_contents>
+                    <has_text text="length=5386" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <conditional name="paired_unpaired">
+                <param name="fastq_input_selector" value="none"/>
+            </conditional>
+            <param name="min_anchor_seg_len" value="10"/>
+            <section name="spades">
+                <param name="kmers" value="21,23"/>
+            </section>
+            <param name="long" value="only_long.fasta" ftype="fasta" />
+            <param name="keep" value="0"/>
+            <output name="assembly_graph" ftype="gfa1">
+                <assert_contents>
+                    <has_text text="S" />
+                </assert_contents>
+            </output>
+            <output name="assembly" ftype="fasta">
+                <assert_contents>
+                    <has_text text=">1" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test keep value = 1 -->
+        <test expect_num_outputs="3">
+            <conditional name="paired_unpaired">
+                <param name="fastq_input_selector" value="paired" />
+                <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" />
+                <param name="fastq_input2" value="phix_r.fq.gz" ftype="fastqsanger" />
+            </conditional>
+            <param name="mode" value="normal" />
+            <param name="keep" value="1"/>
+            <output name="assembly_graph" ftype="gfa1">
+                <assert_contents>
+                    <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/>
+                </assert_contents>
+            </output>
+            <output name="assembly" ftype="fasta">
+                <assert_contents>
+                    <has_text text="length=5386" />
+                </assert_contents>
+            </output>
+            <output_collection name="spades_collection" type="list" count="14">
+                <element name="001_spades_graph_k027">
+                    <assert_contents>
+                        <has_text text="TTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAAC"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <!-- Test keep value = 2 -->
+        <test expect_num_outputs="4">
+            <conditional name="paired_unpaired">
+                <param name="fastq_input_selector" value="paired" />
+                <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" />
+                <param name="fastq_input2" value="phix_r.fq.gz" ftype="fastqsanger" />
+            </conditional>
+            <param name="long" value="onp.fa" ftype="fasta" />
+            <param name="mode" value="normal" />
+            <param name="keep" value="2"/>
+            <output name="assembly_graph" ftype="gfa1">
+                <assert_contents>
+                    <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/>
+                </assert_contents>
+            </output>
+            <output name="assembly" ftype="fasta">
+                <assert_contents>
+                    <has_text text="length=5386" />
+                </assert_contents>
+            </output>
+            <output_collection name="spades_collection" type="list" count="14">
+                <element name="001_spades_graph_k027">
+                    <assert_contents>
+                        <has_text text="TTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAAC"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="bam_file" ftype="bam">
+                <assert_contents>
+                    <has_size value="2084" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test no simple bridges option -->
+        <test expect_num_outputs="2">
+            <conditional name="paired_unpaired">
+                <param name="fastq_input_selector" value="paired" />
+                <param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" />
+                <param name="fastq_input2" value="phix_r.fq.gz" ftype="fastqsanger" />
+            </conditional>
+            <param name="long" value="onp.fa" ftype="fasta" />
+            <param name="mode" value="normal" />
+            <param name="keep" value="0"/>
+            <section name="lr_align">
+                <param name="no_simple_bridges" value="true"/>
+            </section>
             <output name="assembly_graph" ftype="gfa1">
                 <assert_contents>
                     <has_line_matching expression="S\t1\t[ATCG]{5386,5386}\tLN:i:5386\tdp:f:1.0"/>
@@ -352,26 +455,9 @@
                     <has_text text="length=5386" />
                 </assert_contents>
             </output>
-        </test>
-        <test>
-            <conditional name="paired_unpaired">
-                <param name="fastq_input_selector" value="none"/>
-            </conditional>
-            <param name="min_anchor_seg_len" value="10"/>
-            <section name="spades">
-                <param name="kmers" value="21,23"/>
-            </section>
-            <param name="long" value="only_long.fasta" ftype="fasta" />
-            <output name="assembly_graph" ftype="gfa1">
-                <assert_contents>
-                    <has_text text="S" />
-                </assert_contents>
-            </output>
-            <output name="assembly" ftype="fasta">
-                <assert_contents>
-                    <has_text text=">1" />
-                </assert_contents>
-            </output>
+            <assert_command>
+                <has_text text="--no_simple_bridges" />
+            </assert_command>
         </test>
     </tests>
     <help><![CDATA[
@@ -436,11 +522,6 @@
 .. _`Hamming graph`: https://en.wikipedia.org/wiki/Hamming_graph
 .. _`Hamming distance`: https://en.wikipedia.org/wiki/Hamming_distance
 
-This following option turns error correction on and off::
-
-    --no_correct
-        Skip SPAdes error correction step
-        (default: conduct SPAdes error correction)
 
 -----