changeset 11:8f9f06995f98 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/unicycler commit c1cd9b717bce328b276f24d910e2e6858585b2cc
author iuc
date Tue, 19 Dec 2023 15:58:30 +0000
parents d10bdad2fd17
children
files test-data/phix__spades_graph.gfa1 unicycler.xml
diffstat 2 files changed, 79 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phix__spades_graph.gfa1	Tue Dec 19 15:58:30 2023 +0000
@@ -0,0 +1,5 @@
+S	1	CAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGTCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCAGAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATCTGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTAGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGGATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGG	LN:i:5513	dp:f:1.0
+S	2	CCCATGCCTACAGTATTGTTATCGGTAGCAAGCACATCCCCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAACAATTTAGACATGGCGCCACCAGCAAGAGCAGAAGCAATACCGCCAGCAATAGCACCAAACATAAATCACCTCACTTAAGTGGCTG	LN:i:166	dp:f:0.0848319778258616
+L	1	+	1	+	127M
+L	2	+	1	-	127M
+i	367.256	37.4267
--- a/unicycler.xml	Fri Oct 21 16:02:46 2022 +0000
+++ b/unicycler.xml	Tue Dec 19 15:58:30 2023 +0000
@@ -4,20 +4,24 @@
         <token name="@TOOL_VERSION@">0.5.0</token>
         <token name="@VERSION_SUFFIX@">1</token>
     </macros>
-    <xrefs>
-        <xref type="bio.tools">unicycler</xref>
-    </xrefs>
     <edam_topics>
         <edam_topic>topic_0196</edam_topic>
     </edam_topics>
     <edam_operations>
         <edam_operation>operation_0525</edam_operation>
     </edam_operations>
+    <xrefs>
+        <xref type="bio.tools">unicycler</xref>
+    </xrefs>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">unicycler</requirement>
         <requirement type="package" version="1.15.1">samtools</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
+#for r in $reuse
+    ln -s $r.reuse_file ${r.reuse_step}.gfa &&
+#end for
+
 ## Preparing files
 #set $uncompressed = ('fastqsanger','fastq')
 #set $compressed = ('fastqsanger.gz','fastq.gz')
@@ -208,12 +212,20 @@
             <option value="1" selected="true">1: save graphs at main checkpoints</option>
             <option value="2">2: also keep SAM</option>
         </param>
+        <repeat name="reuse" title="Reuse checkpoint files from earlier runs" max="1" help="">
+            <param name="reuse_file" type="data" optional="false" format="gfa1" label="Checkpoint file"/>
+            <param name="reuse_step" type="select" label="Checkpoint">
+                <option value="002_depth_filter">002_depth_filter</option>
+                <option value="003_overlaps_removed">003_overlaps_removed</option>
+                <option value="004_bridges_applied">004_bridges_applied</option>
+            </param>
+        </repeat>
     </inputs>
     <outputs>
         <data name="assembly_graph" format="gfa1" from_work_dir="assembly.gfa" label="${tool.name} on ${on_string}: Final Assembly Graph" />
         <data name="assembly" format="fasta" from_work_dir="assembly.fasta" label="${tool.name} on ${on_string}: Final Assembly"/>
         <collection name="spades_collection" type="list" label="${tool.name} on ${on_string}: SPAdes graphs">
-            <discover_datasets pattern="__designation_and_ext__" format="gfa1" directory="spades_graphs"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gfa" format="gfa1" directory="spades_graphs"/>
             <filter>keep != "0"</filter>
         </collection>
         <data name="bam_file" format="bam" from_work_dir="read_alignment/long_read_alignments.bam" label="${tool.name} on ${on_string}: Long read alignments BAM">
@@ -375,6 +387,35 @@
                 </assert_contents>
             </output>
         </test>
+        <!-- test checkpoint graph reuse 
+             TODO more precise test and check difference to call wo reuse -->
+        <test expect_num_outputs="2">
+            <conditional name="paired_unpaired">
+                <param name="fastq_input_selector" value="paired_collection"/>
+                <param name="fastq_input1">
+                    <collection type="paired">
+                        <element name="forward" value="phix_f.fq.gz" ftype="fastqsanger" />
+                        <element name="reverse" value="phix_r.fq.gz" ftype="fastqsanger" />
+                    </collection>
+                </param>
+            </conditional>
+            <param name="long" value="only_long.fasta" ftype="fasta" />
+            <repeat name="reuse">
+                <param name="reuse_file" value="phix__spades_graph.gfa1"/>
+                <param name="reuse_step" value="002_depth_filter"/>
+            </repeat>
+            <param name="keep" value="0"/>
+            <output name="assembly_graph" ftype="gfa1">
+                <assert_contents>
+                    <has_text text="S" />
+                </assert_contents>
+            </output>
+            <output name="assembly" ftype="fasta">
+                <assert_contents>
+                    <has_text text=">1" />
+                </assert_contents>
+            </output>
+        </test>
         <!-- Test keep value = 1 -->
         <test expect_num_outputs="3">
             <conditional name="paired_unpaired">
@@ -428,6 +469,35 @@
                         <has_text text="TTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAAC"/>
                     </assert_contents>
                 </element>
+                <!-- there are gfa files for more k that are not tested explicily
+                     Aim of testing these is to be sure about the names of the graphs,
+                     since they are used for reuse. Hence if there is a change here
+                     update reuse accordingly-->
+                <element name="001_spades_graph_k127">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
+                <element name="002_depth_filter">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
+                <element name="003_overlaps_removed">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
+                <element name="004_bridges_applied">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
+                <element name="005_final_clean">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
             </output_collection>
             <output name="bam_file" ftype="bam">
                 <assert_contents>