Mercurial > repos > iuc > vsearch

diff clustering.xml @ 0:fae6527990af draft
Imported from capsule None
author: iuc
date: Thu, 21 May 2015 03:58:09 -0400
children: 8c4e2933a17a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/clustering.xml	Thu May 21 03:58:09 2015 -0400
@@ -0,0 +1,167 @@
+<tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0">
+    <description></description>
+    <macros>
+        <import>vsearch_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+    vsearch
+        @GENERAL@
+        --cluster_fast "$infile"
+        ##--cluster_smallmem FILENAME  cluster sequences using a small amount of memory
+        ##--clusters STRING            output each cluster to a separate FASTA file
+
+        #if $maxrejects:
+            --maxrejects $maxrejects
+        #end if
+        #if $maxaccepts:
+            --maxaccepts $maxaccepts
+        #end if
+
+        $cons_truncate
+        --id $id
+        ##--iddef $iddef
+
+        #if '--msaout' in str($outputs):
+            --msaout $msaout
+        #end if
+        #if '--consout' in str($outputs):
+            --consout $consout
+        #end if
+        #if '--centroids' in str($outputs):
+            --centroids $centroids
+        #end if
+        #if '--alnout' in str($outputs):
+            --alnout $alnout
+        #end if
+        #if '--blast6out' in str($outputs):
+            --blast6out $blast6out
+        #end if
+        #if '--notmatched' in str($outputs):
+            --notmatched $notmatched
+        #end if
+        #if '--fastapairs' in str($outputs):
+            --fastapairs $fastapairs
+        #end if
+        #if '--matched' in str($outputs):
+            --matched $matched
+        #end if
+        #if $qmask != 'no':
+            --qmask $qmask
+        #end if
+        #if $sizein:
+            --sizein $sizein
+        #end if
+        #if $sizeout:
+            --sizeout $sizeout
+        #end if
+        --strand $strand
+        --usersort $usersort
+
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--cluster_fast)" />
+        <expand macro="id_and_iddef" />
+        <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False" 
+            label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/>
+        <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False" 
+            label="Indicate that input sequences are presorted" help="(--usersort)"/>
+        <expand macro="qmask" />
+        <expand macro="sizein" />
+        <expand macro="sizeout" />
+        <expand macro="strand" />
+        <expand macro="maxrejects" />
+        <expand macro="maxaccepts" />
+        <expand macro="general_output">
+            <option value="--msaout">Multiple sequence alignments</option>
+            <option value="--consout">Cluster consensus sequences</option>
+            <option value="--centroids">Centroid sequences</option>
+            <option value="--notmatched">Write non-matching query sequences to separate file</option>
+            <option value="--matched">Write matching query sequences to separate file</option>
+        </expand>
+
+    </inputs>
+    <outputs>
+        <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments">
+            <filter>'--msaout' in outputs</filter>
+        </data>
+        <data name="consout" format="fasta" label="${tool.name} on ${on_string}: Consensus Sequences">
+            <filter>'--consout' in outputs</filter>
+        </data>
+        <data name="centroids" format="fasta" label="${tool.name} on ${on_string}: Cluster centroids">
+            <filter>'--centroids' in outputs</filter>
+        </data>
+        <data name="alnout" format="fasta" label="${tool.name} on ${on_string}: Alignment">
+            <filter>'--alnout' in outputs</filter>
+        </data>
+        <data name="notmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched queries">
+            <filter>'--notmatched' in outputs</filter>
+        </data>
+        <data name="matched" format="fasta" label="${tool.name} on ${on_string}: Matching query sequences">
+            <filter>'--matched' in outputs</filter>
+        </data>
+        <data name="blast6out" format="tabular" label="${tool.name} on ${on_string}: BLAST like tabular">
+            <filter>'--blast6out' in outputs</filter>
+        </data>
+        <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences">
+            <filter>'--fastapairs' in outputs</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" />
+            <param name="id" value="0.99"/>
+            <param name="maxaccepts" value="1"/>
+            <param name="maxrejects" value="2"/>
+            <param name="sizeout" value="--sizeout"/>
+            <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" />
+            <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" />
+            <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" />
+            <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" />
+            <!-- The result following result files would be too big -->
+            <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /-->
+            <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /-->
+            <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /-->
+            <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /-->
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+vsearch implements a single-pass, greedy star-clustering algorithm, similar to the algorithms
+implemented in usearch, DNAclust and sumaclust for example.
+
+
+Clustering options (most searching options also apply)
+  --centroids FILENAME         output centroid sequences to FASTA file
+  --cluster_fast FILENAME      cluster sequences fast
+  --cluster_smallmem FILENAME  cluster sequences using a small amount of memory
+  --clusters STRING            output each cluster to a separate FASTA file
+  --consout FILENAME           output cluster consensus sequences to FASTA file
+  --cons_truncate              do not ignore terminal gaps in MSA for consensus
+  --id REAL                    reject if identity lower
+  --iddef INT                  id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)
+  --msaout FILENAME            output multiple seq. alignments to FASTA file 
+  --qmask                      mask seqs with dust, soft or no method (dust)
+  --sizein                     read abundance annotation from input
+  --sizeout                    write cluster abundances to centroid file
+  --strand                     cluster using "plus" or "both" strands (plus)
+  --usersort                   indicate that input sequences are presorted
+
+
+@EXTERNAL_DOCUMENTATION@
+
+-------
+
+@REFERENCES@
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
author	iuc
date	Thu, 21 May 2015 03:58:09 -0400
parents
children	8c4e2933a17a