Mercurial > repos > iuc > mothur_pre_cluster

diff pre.cluster.xml @ 2:39cfb67a03c0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit 3418f23b9768f5aafb86488f5ec1cb97530d4fb3
author: iuc
date: Tue, 20 Mar 2018 22:05:18 -0400
parents: cad74a5a9a6f
children: fbbe2b081299
--- a/pre.cluster.xml	Tue Sep 05 17:08:10 2017 -0400
+++ b/pre.cluster.xml	Tue Mar 20 22:05:18 2018 -0400
@@ -7,47 +7,51 @@
     <expand macro="stdio"/>
     <expand macro="version_command"/>
     <command><![CDATA[
-        @SHELL_OPTIONS@
+@SHELL_OPTIONS@
 
-        ## create symlinks to input datasets
-        ln -s "$fasta" fasta.dat &&
-        ln -s "$name" name.dat &&
-        ln -s "$group" group.dat &&
+## create symlinks to input datasets
+ln -s '$fasta' fasta.dat &&
+ln -s '$name' name.dat &&
+ln -s '$group' group.dat &&
 
-        echo 'pre.cluster(
-            fasta=fasta.dat,
-            #if $name.is_of_type("mothur.names"):
-                name=name.dat,
-            #elif $name.is_of_type("mothur.count_table"):
-                count=name.dat,
-            #end if
-            #if $group:
-                group=group.dat,
-            #end if
-            diffs=$diffs,
-            match=$match,
-            mismatch=$mismatch,
-            gapopen=$gapopen,
-            gapextend=$gapextend,
-            topdown=$topdown,
-            processors='\${GALAXY_SLOTS:-8}'
-        )'
-        | sed 's/ //g'  ## mothur trips over whitespace
-        | mothur
-        | tee mothur.out.log &&
+echo 'pre.cluster(
+    fasta=fasta.dat,
+    #if $name.is_of_type("mothur.names"):
+        name=name.dat,
+    #elif $name.is_of_type("mothur.count_table"):
+        count=name.dat,
+    #end if
+    #if $group:
+        group=group.dat,
+    #end if
+    diffs=$diffs,
+    match=$match,
+    mismatch=$mismatch,
+    gapopen=$gapopen,
+    gapextend=$gapextend,
+    topdown=$topdown,
+    processors='\${GALAXY_SLOTS:-8}'
+)'
+| sed 's/ //g'  ## mothur trips over whitespace
+| mothur
+| tee mothur.out.log &&
 
-        if [ -f fasta.precluster.map ]; then mv fasta.precluster.map fasta.precluster.fasta.map; fi
+cat fasta* &&
+
+if [ -f fasta.precluster.map ]; then mv fasta.precluster.map fasta.precluster.fasta.map; fi
     ]]></command>
     <inputs>
-        <param name="fasta" type="data" format="fasta" label="fasta - Sequence Fasta"/>
-        <param name="name" type="data" format="mothur.names,mothur.count_table" optional="true" label="name file or count table - Sequences Name reference"/>
-        <param name="group" type="data" format="mothur.groups" optional="true" label="group - Sequences Name reference"/>
-        <param name="diffs" type="integer" value="1" min="0" label="diffs - Number of mismatched bases to allow between sequences in a group (default 1)"/>
-        <param name="match" type="integer" value="1" label="match - reward for a match (default 1)"/>
-        <param name="mismatch" type="integer" value="-1" label="mismatch - penalty for a mismatch (default -1)"/>
-        <param name="gapopen" type="integer" value="-2" label="gapopen - penalty for opening a gap (default -2)"/>
-        <param name="gapextend" type="integer" value="-1" label="gapextend - penalty for extending a gap (default -1)"/>
-        <param name="topdown" type="boolean" truevalue="true" falsevalue="false" checked="true" label="topdown" help="cluster from largest abundance to smallest or vice versa. Default=Yes, meaning largest to smallest"/>
+        <param argument="fasta" type="data" format="fasta" label="fasta - Sequence Fasta"/>
+        <param argument="name" type="data" format="mothur.names,mothur.count_table" optional="true" label="name file or count table - Sequences Name reference"/>
+        <param argument="group" type="data" format="mothur.groups" optional="true" label="group - Sequences Name reference"/>
+        <param argument="diffs" type="integer" value="1" min="0" label="diffs - Number of mismatched bases to allow between sequences in a group (default 1)"/>
+        <param argument="match" type="integer" value="1" label="match - reward for a match (default 1)"/>
+        <param argument="mismatch" type="integer" value="-1" label="mismatch - penalty for a mismatch (default -1)"/>
+        <param argument="gapopen" type="integer" value="-2" label="gapopen - penalty for opening a gap (default -2)"/>
+        <param argument="gapextend" type="integer" value="-1" label="gapextend - penalty for extending a gap (default -1)"/>
+        <param argument="topdown" type="boolean" truevalue="true" falsevalue="false" checked="true" label="topdown"
+            help="cluster from largest abundance to smallest or vice versa. Default=Yes, meaning largest to smallest"/>
+        <expand macro="param-savelog"/>
     </inputs>
     <outputs>
         <expand macro="logfile-output"/>
@@ -66,37 +70,76 @@
         <test><!-- test with fasta and names file -->
             <param name="fasta" value="amazon.fasta_head" ftype="fasta"/>
             <param name="name" value="amazon.head.names" ftype="mothur.names"/>
-            <output name="fasta_out" md5="d5d37ace0f2a5c020edd0aa9eb7aca9b" ftype="fasta"/>
-            <output name="names_out" md5="51090fe5e111920ec1003c2073532d96" ftype="mothur.names"/>
+            <output name="fasta_out" ftype="fasta">
+                <assert_contents>
+                    <expand macro="test-fasta-format"/>
+                    <has_text text="U68589"/>
+                </assert_contents>
+            </output>
+            <output name="names_out" ftype="mothur.names">
+                <assert_contents>
+                    <expand macro="test-names-format"/>
+                    <has_line_matching expression="^U68589\tU68589$"/>
+                </assert_contents>
+            </output>
             <output_collection name="map_out" count="1">
-                <element name="fasta" md5="c2ca7b0d45aa3881c7bd5cb8a44c853b" ftype="tabular"/>
+                <element name="fasta" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="ideal_seq_1"/>
+                        <has_text text="U68589"/>
+                    </assert_contents>
+                </element>
             </output_collection>
+            <param name="savelog" value="true"/>
             <expand macro="logfile-test"/>
         </test>
         <test><!-- test with fasta and count table -->
             <param name="fasta" value="amazon.fasta_head" ftype="fasta"/>
             <param name="name" value="amazon.count_table" ftype="mothur.count_table"/>
-            <output name="fasta_out" md5="d5d37ace0f2a5c020edd0aa9eb7aca9b" ftype="fasta"/>
-            <output name="count_out" md5="9831e271b9a96f91ad700f84aeed03a5" ftype="mothur.count_table"/>
+            <output name="fasta_out" ftype="fasta">
+                <assert_contents>
+                    <expand macro="test-fasta-format"/>
+                    <has_text text="U68589"/>
+                </assert_contents>
+            </output>
+            <output name="count_out" ftype="mothur.count_table">
+                <assert_contents>
+                    <expand macro="test-count-format"/>
+                    <has_text text="U68589"/>
+                </assert_contents>
+            </output>
             <output_collection name="map_out" count="1">
-                <element name="fasta" md5="c2ca7b0d45aa3881c7bd5cb8a44c853b" ftype="tabular"/>
+                <element name="fasta" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="ideal_seq_1"/>
+                        <has_text text="U68589"/>
+                    </assert_contents>
+                </element>
             </output_collection>
+            <param name="savelog" value="true"/>
             <expand macro="logfile-test"/>
         </test>
     </tests>
-    <help>
-<![CDATA[
+    <help><![CDATA[
 
 @MOTHUR_OVERVIEW@
 
 **Command Documentation**
 
-The pre.cluster_ command implements a pseudo-single linkage algorithm with the goal of removing sequences that are likely due to pyrosequencing errors. The basic idea is that abundant sequences are more likely to generate erroneous sequences than rare sequences. With that in mind, the algorithm proceeds by ranking sequences in order of their abundance. Then we walk through the list of sequences looking for rarer sequences that are within some threshold of the original sequence. Those that are within the threshold are merged with the larger sequence. The original Huse method performs this task on a distance matrix, whereas we do it based on the original sequences. The advantage of our approach is that the algorithm works on aligned sequences instead of a distance matrix. This is advantageous because by pre-clustering you remove a large number of sequences making the distance calculation much faster.
+The pre.cluster_ command implements a pseudo-single linkage algorithm with the goal of removing sequences that
+are likely due to pyrosequencing errors. The basic idea is that abundant sequences are more likely to generate
+erroneous sequences than rare sequences. With that in mind, the algorithm proceeds by ranking sequences in order
+of their abundance. Then we walk through the list of sequences looking for rarer sequences that are within some
+threshold of the original sequence. Those that are within the threshold are merged with the larger sequence.
+The original Huse method performs this task on a distance matrix, whereas we do it based on the original sequences.
+The advantage of our approach is that the algorithm works on aligned sequences instead of a distance matrix.
+This is advantageous because by pre-clustering you remove a large number of sequences making the distance
+calculation much faster.
 
 .. _pre.cluster: https://www.mothur.org/wiki/Pre.cluster
 
 v1.24.0: Updated to mothur 1.33, added count and topdown parameter
-]]>
-    </help>
+
+    ]]></help>
     <expand macro="citations"/>
 </tool>
author	iuc
date	Tue, 20 Mar 2018 22:05:18 -0400
parents	cad74a5a9a6f
children	fbbe2b081299