Mercurial > repos > iuc > mothur_filter_seqs
view filter.seqs.xml @ 7:daccf69bc281 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit ff56a4c9218ddea84ff5cd72fd98c62db212465d"
author | iuc |
---|---|
date | Fri, 06 Nov 2020 00:02:41 +0000 |
parents | e71c69acde3f |
children |
line wrap: on
line source
<tool profile="16.07" id="mothur_filter_seqs" name="Filter.seqs" version="@WRAPPER_VERSION@.0"> <description>removes columns from alignments</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="stdio"/> <expand macro="version_command"/> <command><![CDATA[ @SHELL_OPTIONS@ #import re ## create symlinks to input datasets ln -s '$fasta' fasta.dat && #for $i in $inputs: ln -s '$i.fasta' fasta${inputs.index($i)}.dat && #end for ln -s '$hard' hard.dat && echo 'filter.seqs( fasta=fasta.dat#for $i in $inputs#-fasta${inputs.index($i)}.dat#end for#, vertical=$vertical, #if $trump: trump=$trump, #end if soft=$soft, #if $hard: hard=hard.dat, #end if processors='\${GALAXY_SLOTS:-8}' )' | sed 's/ //g' ## mothur trips over whitespace | mothur | tee mothur.out.log && ## rename collection files for more transparent element naming #set $identifier=re.sub('[^\w\-\s\.]', '_', str($fasta.element_identifier)) mv fasta.filter.fasta '${identifier}.filter.fasta' #for i in $inputs: #set $identifier=re.sub('[^\w\-\s]', '_', str($i.fasta.element_identifier)) && mv fasta${inputs.index($i)}.filter.fasta '${identifier}.filter.fasta' #end for ]]></command> <inputs> <param name="fasta" type="data" format="mothur.align" label="fasta - Alignment Fasta"/> <repeat name="inputs" title="Additional Alignment File"> <param name="fasta" type="data" format="mothur.align" label="fasta - Alignment Fasta"/> </repeat> <param name="vertical" type="boolean" checked="true" truevalue="true" falsevalue="false" label="vertical - Vertical column" help="Ignore any column that only contains gap characters (i.e. '-' or '.')"/> <param name="trump" type="select" label="trump - Trump character" help="Remove a column if the trump character is found at that position in any sequence of the alignment"> <option value="">Off</option> <option value=".">.</option> <option value="-">-</option> <option value="N">N</option> </param> <param name="soft" type="integer" value="0" min="0" max="100" label="soft - percentage required to retain column. (0-100)" help="Removes any column where the dominant base (i.e. A, T, G, C, or U) does not occur in at least a designated percentage of sequences"/> <param name="hard" type="data" format="mothur.filter" optional="True" label="hard - Hard Column Filter" help="A file should only contain one line consisting of 0's and 1's"/> <expand macro="param-savelog"/> </inputs> <outputs> <expand macro="logfile-output"/> <data name="out_filter" format="mothur.filter" from_work_dir="fasta*.filter" label="${tool.name} on ${on_string}: filter"/> <collection name="filteredfastas" type="list" label="${tool.name} on ${on_string}: filtered fastas"> <discover_datasets pattern="(?P<designation>.*)\.filter\.fasta" format="fasta"/> <filter>inputs</filter> <!-- only output collection if multiple outputs--> </collection> <data name="filteredfasta" format="fasta" from_work_dir="*.filter.fasta" label="${tool.name} on ${on_string}: filtered fasta"> <filter>not inputs</filter> </data> </outputs> <tests> <test><!-- test with multiple inputs and collection output --> <param name="fasta" value="HMP_MOCK.v35.align" ftype="mothur.align"/> <repeat name="inputs"> <param name="fasta" value="Mock_S280_L001_R1_001_small.trim.contigs.good.align_head" ftype="mothur.align"/> </repeat> <output name="out_filter" md5="3e6c2cfef46baf35d2a8b5cafe53e3a4"/> <output_collection name="filteredfastas" count="2"> <element name="HMP_MOCK.v35.align" md5="ef4c6c2d9a882f7a22e5fa3c814af7cc" ftype="fasta"/> </output_collection> <param name="savelog" value="true"/> <expand macro="logfile-test"/> </test> <test><!-- test with single input and non-collection output --> <param name="fasta" value="HMP_MOCK.v35.align" ftype="mothur.align"/> <output name="out_filter" md5="3e6c2cfef46baf35d2a8b5cafe53e3a4"/> <output name="filteredfasta" md5="ef4c6c2d9a882f7a22e5fa3c814af7cc"/> <param name="savelog" value="true"/> <expand macro="logfile-test"/> </test> </tests> <help><![CDATA[ @MOTHUR_OVERVIEW@ **Command Documentation** The filter.seqs_ command removes columns from alignments based on a criteria defined by the user. For example, alignments generated against reference alignments (e.g. from RDP, SILVA, or greengenes) often have columns where every character is either a '.' or a '-'. These columns are not included in calculating distances because they have no information in them. By removing these columns, the calculation of a large number of distances is accelerated. Also, people also like to mask their sequences to remove variable regions using a soft or hard mask (e.g. Lane's mask). This type of masking is only encouraged for deep-level phylogenetic analysis, not fine level analysis such as that needed with calculating OTUs. .. _filter.seqs: https://www.mothur.org/wiki/Filter.seqs v.1.20.0: Updated to Mothur 1.33 ]]></help> <expand macro="citations"/> </tool>