diff maxbin2.xml @ 6:2bc40bfbcfb4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maxbin2/ commit e0e4e512a0858295ce3d445b9413de2b5bcfcd78
author iuc
date Mon, 16 Oct 2023 07:46:13 +0000
parents 8a0473eb465e
children 28a0b1446d2b
line wrap: on
line diff
--- a/maxbin2.xml	Fri Jun 17 17:27:40 2022 +0000
+++ b/maxbin2.xml	Mon Oct 16 07:46:13 2023 +0000
@@ -1,8 +1,5 @@
-<tool id="maxbin2" name="MaxBin2" version="@MAXBIN_VERSION@+galaxy3">
+<tool id="maxbin2" name="MaxBin2" version="@MAXBIN_VERSION@+galaxy4">
     <description>clusters metagenomic contigs into bins</description>
-    <xrefs>
-        <xref type="bio.tools">masigpro</xref>
-    </xrefs>
     <macros>
         <token name="@MAXBIN_VERSION@">2.2.7</token>
         <xml name="contig">
@@ -19,19 +16,40 @@
             <param argument="--reassembly" type="boolean" truevalue="-reassembly" falsevalue="" checked="false" label="Reassembly" help="Reassembly option is still highly experimental. To use this function, you need to feed MaxBin interleaved paired-end fastq or fasta file." />
         </xml>
     </macros>
+    <xrefs>
+        <xref type="bio.tools">maxbin</xref>
+    </xrefs>
     <requirements>
         <requirement type="package" version="@MAXBIN_VERSION@">maxbin2</requirement>
     </requirements>
     <version_command><![CDATA[run_MaxBin.pl -version | head -n 1]]></version_command>
     <command detect_errors="exit_code"><![CDATA[
 ## generate read or abundance files
+#import re
 #if $assembly.inputs.type == 'reads'
     #if $assembly.type == 'individual'
-echo '$assembly.inputs.reads' >> reads_list &&
+        ## uncompress .gz reads files if necessary
+        #set $e = $assembly.inputs.reads
+        #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier))
+        #if $e.ext.endswith(".gz")
+gunzip -c '$e' > '$identifier' &&
+echo '$identifier' >> reads_list &&
+        #else 
+ln -s '$e' '$identifier' &&
+echo '$identifier' >> reads_list &&
+        #end if
     #else
-        #for $r in $assembly.inputs.reads
+        #for $i, $r in enumerate($assembly.inputs.reads)
             #if $r
-echo '$r' >> reads_list &&
+                #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($r.element_identifier))
+                #set $newid = $identifier +  '_'  + str($i)
+                #if $r.ext.endswith(".gz")
+gunzip -c '$r' > '$newid' &&                        
+echo '$newid' >> reads_list &&
+                #else
+ln -s '$r' '$newid' &&
+echo '$newid' >> reads_list &&
+                #end if
             #end if
         #end for
     #end if
@@ -94,7 +112,7 @@
                 <conditional name="inputs">
                     <expand macro="input_type"/>
                     <when value="reads">
-                        <param argument="-reads" type="data" format="fasta,fastq" label="Reads file"/>
+                        <param argument="-reads" type="data" format="fasta,fastq,fastq.gz,fasta.gz" label="Reads file"/>
                         <expand macro="reads_extra_params"/>
                     </when>
                     <when value="abund">
@@ -344,6 +362,129 @@
                 </assert_contents>
             </output>
         </test>
+        <!-- test w contigs and reads in fastqsanger format as input -->
+        <test expect_num_outputs="4">
+            <param name="contig" value="test4_contigs.fasta" ftype="fasta" />
+            <conditional name="assembly">
+                <param name="type" value="individual"/>
+                <conditional name="inputs">
+                    <param name="type" value="reads"/>
+                    <param name="reads" value="test4_reads.fastqsanger" ftype="fastqsanger"/>
+                    <param name="output_abundances" value="false"/>
+                    <param name="reassembly" value=""/>
+                </conditional>
+            </conditional>
+            <section name="adv">
+                <param name="min_contig_length" value="1000"/>
+                <param name="max_iteration" value="50"/>
+                <param name="prob_threshold" value="0.5"/>
+            </section>
+            <section name="output">
+                <param name="plotmarker" value=""/>
+                <param name="marker" value="false"/>
+                <param name="markers" value="false" />
+                <param name="log" value="false"/>
+                <param name="markerset" value="107"/>
+            </section>
+            <output_collection name="bins" type="list" count="2">
+                <element name="001" file="4/out.001.fasta" ftype="fasta"/>
+                <element name="002" file="4/out.002.fasta" ftype="fasta"/>
+            </output_collection>
+            <output name="summary" file="4/out.summary" ftype="tabular" />
+            <output name="noclass" file="4/out.noclass" ftype="fasta" />
+            <output name="toshort" file="4/out.tooshort" ftype="fasta" />
+        </test>
+        <!-- test w contigs and reads in fastqsanger.gz format as input -->
+        <test expect_num_outputs="4">
+            <param name="contig" value="test4_contigs.fasta" ftype="fasta" />
+            <conditional name="assembly">
+                <param name="type" value="individual"/>
+                <conditional name="inputs">
+                    <param name="type" value="reads"/>
+                    <param name="reads" value="test4_reads.fastqsanger.gz" ftype="fastqsanger.gz"/>
+                    <param name="output_abundances" value="false"/>
+                    <param name="reassembly" value=""/>
+                </conditional>
+            </conditional>
+            <section name="adv">
+                <param name="min_contig_length" value="1000"/>
+                <param name="max_iteration" value="50"/>
+                <param name="prob_threshold" value="0.5"/>
+            </section>
+            <section name="output">
+                <param name="plotmarker" value=""/>
+                <param name="marker" value="false"/>
+                <param name="markers" value="false" />
+                <param name="log" value="false"/>
+                <param name="markerset" value="107"/>
+            </section>
+            <output_collection name="bins" type="list" count="2">
+                <element name="001" file="4/out.001.fasta" ftype="fasta"/>
+                <element name="002" file="4/out.002.fasta" ftype="fasta"/>
+            </output_collection>
+            <output name="summary" file="4/out.summary" ftype="tabular" />
+            <output name="noclass" file="4/out.noclass" ftype="fasta" />
+            <output name="toshort" file="4/out.tooshort" ftype="fasta" />
+        </test>
+        <!-- test w co-assembled contigs and multiple reads as .gz and optional outputs -->
+        <test expect_num_outputs="9">
+            <param name="contig" value="test4_contigs.fasta" ftype="fasta" />
+            <conditional name="assembly">
+                <param name="type" value="coassembly"/>
+                <conditional name="inputs">
+                    <param name="type" value="reads"/>
+                    <param name="reads" value="test4_reads.fastqsanger.gz,test4_reads.fastqsanger.gz" ftype="fasta"/>
+                    <param name="output_abundances" value="true"/>
+                    <param name="reassembly" value=""/>
+                </conditional>
+            </conditional>
+            <section name="adv">
+                <param name="min_contig_length" value="1000"/>
+                <param name="max_iteration" value="50"/>
+                <param name="prob_threshold" value="0.5"/>
+            </section>
+            <section name="output">
+                <param name="plotmarker" value="true"/>
+                <param name="marker" value="true"/>
+                <param name="markers" value="true" />
+                <param name="log" value="true"/>
+                <param name="markerset" value="107"/>
+            </section>
+            <output_collection name="bins" type="list" count="2">
+                <element name="001" file="4/out.001.fasta" ftype="fasta"/>
+                <element name="002" file="4/out.002.fasta" ftype="fasta"/>
+            </output_collection>
+            <output name="summary" ftype="tabular">
+                <assert_contents>
+                    <has_text text="Completeness"/>
+                    <has_text text="out.001.fasta"/>
+                </assert_contents>
+            </output>
+            <output name="noclass" file="4/out.noclass" ftype="fasta" />
+            <output name="toshort" file="4/out.tooshort" ftype="fasta" />
+            <output name="log" ftype="txt" >
+                <assert_contents>
+                    <has_text text="Input contig"/>
+                    <has_text text="Elapsed time"/>
+                    <has_text text="Yielded 2 bins for contig (scaffold) file"/>
+                </assert_contents>
+            </output>
+            <output name="abundout" file="4/out.abund1" ftype="tabular" />
+            <output name="marker" file="4/out.marker" ftype="tabular" />
+            <output name="plot" file="4/out.marker.pdf" ftype="pdf" compare="sim_size" />
+            <output_collection name="markers" type="list" count="2">
+                <element name="001" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">out.001.Methyltransf_5"/>
+                    </assert_contents>
+                </element>
+                <element name="002" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">out.002.Methyltransf_5"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
     </tests>
     <help><![CDATA[
 MaxBin is a software that clusters metagenomic contigs into different bins,