Mercurial > repos > iuc > vsearch
diff clustering.xml @ 0:fae6527990af draft
Imported from capsule None
author | iuc |
---|---|
date | Thu, 21 May 2015 03:58:09 -0400 |
parents | |
children | 8c4e2933a17a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clustering.xml Thu May 21 03:58:09 2015 -0400 @@ -0,0 +1,167 @@ +<tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0"> + <description></description> + <macros> + <import>vsearch_macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command> +<![CDATA[ + vsearch + @GENERAL@ + --cluster_fast "$infile" + ##--cluster_smallmem FILENAME cluster sequences using a small amount of memory + ##--clusters STRING output each cluster to a separate FASTA file + + #if $maxrejects: + --maxrejects $maxrejects + #end if + #if $maxaccepts: + --maxaccepts $maxaccepts + #end if + + $cons_truncate + --id $id + ##--iddef $iddef + + #if '--msaout' in str($outputs): + --msaout $msaout + #end if + #if '--consout' in str($outputs): + --consout $consout + #end if + #if '--centroids' in str($outputs): + --centroids $centroids + #end if + #if '--alnout' in str($outputs): + --alnout $alnout + #end if + #if '--blast6out' in str($outputs): + --blast6out $blast6out + #end if + #if '--notmatched' in str($outputs): + --notmatched $notmatched + #end if + #if '--fastapairs' in str($outputs): + --fastapairs $fastapairs + #end if + #if '--matched' in str($outputs): + --matched $matched + #end if + #if $qmask != 'no': + --qmask $qmask + #end if + #if $sizein: + --sizein $sizein + #end if + #if $sizeout: + --sizeout $sizeout + #end if + --strand $strand + --usersort $usersort + +]]> + </command> + <inputs> + <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--cluster_fast)" /> + <expand macro="id_and_iddef" /> + <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False" + label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/> + <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False" + label="Indicate that input sequences are presorted" help="(--usersort)"/> + <expand macro="qmask" /> + <expand macro="sizein" /> + <expand macro="sizeout" /> + <expand macro="strand" /> + <expand macro="maxrejects" /> + <expand macro="maxaccepts" /> + <expand macro="general_output"> + <option value="--msaout">Multiple sequence alignments</option> + <option value="--consout">Cluster consensus sequences</option> + <option value="--centroids">Centroid sequences</option> + <option value="--notmatched">Write non-matching query sequences to separate file</option> + <option value="--matched">Write matching query sequences to separate file</option> + </expand> + + </inputs> + <outputs> + <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments"> + <filter>'--msaout' in outputs</filter> + </data> + <data name="consout" format="fasta" label="${tool.name} on ${on_string}: Consensus Sequences"> + <filter>'--consout' in outputs</filter> + </data> + <data name="centroids" format="fasta" label="${tool.name} on ${on_string}: Cluster centroids"> + <filter>'--centroids' in outputs</filter> + </data> + <data name="alnout" format="fasta" label="${tool.name} on ${on_string}: Alignment"> + <filter>'--alnout' in outputs</filter> + </data> + <data name="notmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched queries"> + <filter>'--notmatched' in outputs</filter> + </data> + <data name="matched" format="fasta" label="${tool.name} on ${on_string}: Matching query sequences"> + <filter>'--matched' in outputs</filter> + </data> + <data name="blast6out" format="tabular" label="${tool.name} on ${on_string}: BLAST like tabular"> + <filter>'--blast6out' in outputs</filter> + </data> + <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences"> + <filter>'--fastapairs' in outputs</filter> + </data> + </outputs> + <tests> + <test> + <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> + <param name="id" value="0.99"/> + <param name="maxaccepts" value="1"/> + <param name="maxrejects" value="2"/> + <param name="sizeout" value="--sizeout"/> + <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" /> + <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" /> + <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" /> + <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" /> + <!-- The result following result files would be too big --> + <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /--> + <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /--> + <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /--> + <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /--> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +vsearch implements a single-pass, greedy star-clustering algorithm, similar to the algorithms +implemented in usearch, DNAclust and sumaclust for example. + + +Clustering options (most searching options also apply) + --centroids FILENAME output centroid sequences to FASTA file + --cluster_fast FILENAME cluster sequences fast + --cluster_smallmem FILENAME cluster sequences using a small amount of memory + --clusters STRING output each cluster to a separate FASTA file + --consout FILENAME output cluster consensus sequences to FASTA file + --cons_truncate do not ignore terminal gaps in MSA for consensus + --id REAL reject if identity lower + --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2) + --msaout FILENAME output multiple seq. alignments to FASTA file + --qmask mask seqs with dust, soft or no method (dust) + --sizein read abundance annotation from input + --sizeout write cluster abundances to centroid file + --strand cluster using "plus" or "both" strands (plus) + --usersort indicate that input sequences are presorted + + +@EXTERNAL_DOCUMENTATION@ + +------- + +@REFERENCES@ + + +]]> + </help> + <expand macro="citations" /> +</tool>