view trim.seqs.xml @ 0:143f949a5def draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit a9d1e0debcd357d8080a1c6c5f1d206dd45a7a4d
author iuc
date Fri, 19 May 2017 05:37:34 -0400
parents
children baa3df51e2b7
line wrap: on
line source

<tool profile="16.07" id="mothur_trim_seqs" name="Trim.seqs" version="@WRAPPER_VERSION@.0">
    <description>Trim sequences - primers, barcodes, quality</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
        @SHELL_OPTIONS@

        ## create symlinks to input datasets
        ln -s "$fasta" fasta.dat &&
        ln -s "$names" names.dat &&
        ln -s "$count" count.dat &&
        #if $oligo.add == "yes":
            ln -s "$oligo.oligos" oligo.oligos.dat &&
        #end if
        #if $qual.add2 == "yes":
            ln -s "$qual.qfile" qual.qfile.dat &&
        #end if

        echo 'trim.seqs(
            fasta=fasta.dat,
            minlength=$minlength,
            maxlength=$maxlength,
            maxambig=$maxambig,
            maxhomop=$maxhomop,
            keepfirst=$keepfirst,
            removelast=$removelast,
            #if $oligo.add == "yes":
                oligos=oligo.oligos.dat,
                bdiffs=$oligo.bdiffs,
                pdiffs=$oligo.pdiffs,
                tdiffs=$oligo.tdiffs,
                ldiffs=$oligo.ldiffs,
                sdiffs=$oligo.sdiffs,
                keepforward=$oligo.keepforward,
                allfiles=$oligo.allfiles,
            #end if
            #if $qual.add2 == "yes":
                qfile=qual.qfile.dat,
                qaverage=$qual.qaverage,
                qthreshold=$qual.qthreshold,
                qwindowaverage=$qual.qwindowaverage,
                qwindowsize=$qual.qwindowsize,
                rollaverage=$qual.rollaverage,
                qstepsize=$qual.qstepsize,
                qtrim=$qual.qtrim,
            #end if
            flip=$flip,
            #if $names:
                name=names.dat,
            #end if
            logtransform=$logtransform,
            checkorient=$checkorient,
            #if $count:
                count=count.dat,
            #end if
            processors='\${GALAXY_SLOTS:-8}'
        )'
        | sed 's/ //g'  ## mothur trips over whitespace
        | mothur
        | tee mothur.out.log
        ## prevent these two files from being gathered into collection
        && mv fasta.trim.fasta fasta.trim
        && mv fasta.scrap.fasta fasta.scrap
    ]]></command>
    <inputs>
        <param name="fasta" type="data" format="fasta" label="fasta - Sequences"/>
        <param name="names" type="data" format="mothur.names" optional="true" label="name - Sequence representative name list"/>
        <param name="minlength" type="integer" value="0" min="0" label="minlength - Minimum Sequence Length (default 0, ignored if &#060; 1 )"/>
        <param name="maxlength" type="integer" value="0" min="0" label="maxlength - Maximum Sequence Length (default 0, ignored if &#060; 1)"/>
        <param name="maxambig" type="integer" value="-1" min="-1" label="maxambig - Maximum ambiguous bases (default -1, ignored if &#060; 0)"/>
        <param name="maxhomop" type="integer" value="0" min="0" label="maxhomop - Maximum homopolymers (default 0, ignored if &#060; 1)"/>
        <param name="keepfirst" type="integer" value="0" min="0" label="keepfirst - ignored if &#060; 0)" help="trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements"/>
        <param name="removelast" type="integer" value="0" min="0" label="removelast - ignored if &#060; 0)" help="removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements"/>
        <conditional name="oligo">
            <param name="add" type="select" label="Trim with an oligos file?" help="">
                <option value="no">no</option>
                <option value="yes">yes</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <param name="oligos" type="data" format="mothur.oligos" label="oligos - barcodes and primers"/>
                <param name="bdiffs" type="integer" value="0" min="0" label="bdiffs - number of differences to allow in the barcode (default 0)"/>
                <param name="pdiffs" type="integer" value="0" min="0" label="pdiffs - number of differences to allow in the primer (default 0)"/>
                <param name="tdiffs" type="integer" value="0" min="0" label="tdiffs - total number of differences to allow in primer and barcode (default 0)"/>
                <param name="ldiffs" type="integer" value="0" min="0" optional="true" label="ldiffs - total number of differences to allow in linker sequence (default 0)"/>
                <param name="sdiffs" type="integer" value="0" min="0" optional="true" label="sdiffs - total number of differences to allow in spacer sequence (default 0)"/>
                <param name="keepforward" type="boolean" truevalue="true" falsevalue="false" checked="false" label="keepforward - keep the primer"/>
                <param name="allfiles" type="boolean" truevalue="true" falsevalue="false" checked="false" label="allfiles - separate into file per barcode"/>
            </when>
        </conditional>
        <conditional name="qual">
            <param name="add2" type="select" label="Trim based on a quality file?" help="">
                <option value="no">no</option>
                <option value="yes">yes</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <param name="qfile" type="data" format="qual454" label="qfile - 454 quality file"/>
                <param name="qaverage" type="integer" value="0" min="0" label="qaverage - remove sequences that have an average base quality below this value (ignored if &#060; 1)"/>
                <param name="qthreshold" type="integer" value="0" min="0" label="qthreshold - remove sequences that have any base quality below this value (ignored if &#060; 1)"/>
                <param name="qwindowaverage" type="integer" value="0" min="0" label="qwindowaverage - remove sequences that have an average base quality below this value over a window (ignored if &#060; 1)"/>
                <param name="qwindowsize" type="integer" value="50" min="0" label="qwindowsize - number of bases in a window. Default=50."/>
                <param name="rollaverage" type="integer" value="0" min="0" label="rollaverage - remove sequences that have an average base quality below this value in a rolling window (ignored if &#060; 1)"/>
                <param name="qstepsize" type="integer" value="1" min="0" label="qstepsize - number of bases to move the window over. Default=1."/>
                <param name="qtrim" type="boolean" truevalue="true" falsevalue="false" checked="false" label="qtrim - trim sequences below qthreshold and put in trim output, else put in scrap "/>
            </when>
        </conditional>
        <param name="flip" type="boolean" truevalue="true" falsevalue="false" checked="false" label="flip - reverse complement the trimmed sequences"/>
        <param name="count" type="data" format="mothur.count_table" optional="true" label="count - a count_table" help="The count file is similar to the name file in that it is used to represent the number of duplicate sequences for a given representative sequence. If you run trim.seqs with an oligos file that contains group labels, trim.seqs will create a new *.trim.count_table with the group information included. "/>
        <param name="logtransform" type="boolean" truevalue="true" falsevalue="false" checked="false" label="logtransform" help="allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform."/>
        <param name="checkorient" type="boolean" truevalue="true" falsevalue="false" checked="false" label="checkorient - search the reverse complement?" help="If you are running the trim.seqs command with paired barcodes or primers, you can use the checkorient parameter. When checkorient=t and mothur can't find the barcodes and primers, it will search the reverse compliment. "/>
    </inputs>
    <outputs>
        <expand macro="logfile-output"/>
        <data name="trim_fasta" format_source="fasta" from_work_dir="fasta.trim" label="${tool.name} on ${on_string}: trim.fasta"/>
        <data name="scrap_fasta" format_source="fasta" from_work_dir="fasta.scrap" label="${tool.name} on ${on_string}: scrap.fasta"/>
        <data name="trim_names" format="mothur.names" from_work_dir="names*.trim.names" label="${tool.name} on ${on_string}: trim.names">
            <filter>names</filter>
        </data>
        <data name="scrap_names" format="mothur.names" from_work_dir="names*.scrap.names" label="${tool.name} on ${on_string}: scrap.names">
            <filter>names</filter>
        </data>
        <data name="trim_count" format="mothur.count_table" from_work_dir="count*.trim.count_table" label="${tool.name} on ${on_string}: trim.count">
            <filter>count</filter>
        </data>
        <data name="scrap_count" format="mothur.count_table" from_work_dir="count*.scrap.count_table" label="${tool.name} on ${on_string}: scrap.count">
            <filter>count</filter>
        </data>
        <data name="trim_qual" format_source="qfile" from_work_dir="fasta*.trim.qual" label="${tool.name} on ${on_string}: trim.qual">
            <filter>qual['add2'] == 'yes'</filter>
        </data>
        <data name="scrap_qual" format_source="qfile" from_work_dir="fasta*.scrap.qual" label="${tool.name} on ${on_string}: scrap.qual">
            <filter>qual['add2'] == 'yes'</filter>
        </data>
        <data name="groups_file" format="mothur.groups" from_work_dir="fasta.groups" label="${tool.name} on ${on_string}: groups">
            <filter>oligo['add'] == 'yes'</filter>
        </data>
         <collection name="fasta_allfiles" type="list" label="${tool.name} on ${on_string}: allfiles fasta">
             <filter>oligo['add'] == 'yes' and oligo['allfiles']</filter>
             <discover_datasets pattern="fasta.*?\.(?P&lt;designation&gt;.*)\.fasta" format="fasta"/>
         </collection>
         <collection name="groups_allfiles" type="list" label="${tool.name} on ${on_string}: allfiles groups)">
             <filter>oligo['add'] == 'yes' and oligo['allfiles']</filter>
             <discover_datasets pattern="fasta.*?\.(?P&lt;designation&gt;.*)\.groups" format="mothur.groups"/>
         </collection>
    </outputs>
    <tests>
        <test><!-- test with fasta and names -->
            <param name="fasta" value="amazon.fasta" ftype="fasta"/>
            <param name="names" value="amazon1.names" ftype="mothur.names"/>
            <param name="maxhomop" value="4"/>
            <output name="trim_fasta" md5="14dcaa23735a3f545e7014a69b002859" ftype="fasta"/>
            <output name="scrap_fasta" md5="4f791b7684662f1f962970af46429e24" ftype="fasta"/>
            <output name="trim_names" md5="1b8c6c47052bb69524ef56ebb764fb8f" ftype="mothur.names"/>
            <output name="scrap_names" md5="80f9252837e4b189f06ec00469b88e85" ftype="mothur.names"/>
            <expand macro="logfile-test"/>
        </test>
        <test><!-- test with count table -->
            <param name="fasta" value="amazon.fasta" ftype="fasta"/>
            <param name="count" value="amazon1.count_table" ftype="mothur.count_table"/>
            <param name="maxhomop" value="4"/>
            <output name="trim_fasta" md5="14dcaa23735a3f545e7014a69b002859" ftype="fasta"/>
            <output name="scrap_fasta" md5="4f791b7684662f1f962970af46429e24" ftype="fasta"/>
            <output name="trim_count" md5="836b4d72a8cda3741ef435741783b384" ftype="mothur.count_table"/>
            <output name="scrap_count" md5="04ae9f50c1b6f0d8d7e1ac28f845dd4c" ftype="mothur.count_table"/>
            <expand macro="logfile-test"/>
        </test>
        <test><!-- test with oligos -->
            <param name="fasta" value="amazon.fasta" ftype="fasta"/>
            <param name="add" value="yes"/>
            <param name="oligos" value="GQY1XT001.oligos" ftype="mothur.oligos"/>
            <param name="bdiffs" value="100"/>
            <param name="pdiffs" value="100"/>
            <output name="trim_fasta" md5="75a8a3ae2d1fe1ff2b860480b84e9bd6" ftype="fasta"/>
            <output name="scrap_fasta" md5="c4fd14e70ab7d1c21d238e87624829d7" ftype="fasta"/>
            <output name="groups_file" md5="198957282c234e825414e175d926046a" ftype="mothur.groups"/>
            <expand macro="logfile-test"/>
        </test>
        <test><!-- test with oligos and allfiles parameter -->
            <param name="fasta" value="amazon.fasta" ftype="fasta"/>
            <param name="add" value="yes"/>
            <param name="oligos" value="GQY1XT001.oligos" ftype="mothur.oligos"/>
            <param name="bdiffs" value="100"/>
            <param name="pdiffs" value="100"/>
            <param name="allfiles" value="true"/>
            <output name="trim_fasta" md5="75a8a3ae2d1fe1ff2b860480b84e9bd6" ftype="fasta"/>
            <output name="scrap_fasta" md5="c4fd14e70ab7d1c21d238e87624829d7" ftype="fasta"/>
            <output_collection name="groups_allfiles" count="9">
                <element name="F003D144" md5="445124b06d0c9146ae353631794c8093" ftype="mothur.groups"/>
            </output_collection>
            <output_collection name="fasta_allfiles" count="9">
                <element name="F003D144" md5="025ff271ac24ecb898863d7fcbfabf10" ftype="fasta"/>
            </output_collection>
            <expand macro="logfile-test"/>
        </test>
        <test><!-- test with qfile-->
            <param name="fasta" value="Fasting_Example1.fasta" ftype="fasta"/>
            <param name="add2" value="yes"/>
            <param name="qfile" value="Fasting_Example1.qual" ftype="qual454"/>
            <param name="maxhomop" value="4"/>
            <output name="trim_fasta" md5="d02f74acd6d9fb52b04a93869bb79302" ftype="fasta"/>
            <output name="scrap_fasta" md5="a4d3ef3d91b4c0146ec84bb7aad3987c" ftype="fasta"/>
            <output name="trim_qual" md5="3d4e2d3c7dd43b90660ab9c923d9eab1" ftype="qual454"/>
            <output name="scrap_qual" md5="22931236d082c2b77811bbf912c1f4b1" ftype="qual454"/>
            <expand macro="logfile-test"/>
        </test>
    </tests>
    <help>
<![CDATA[

@MOTHUR_OVERVIEW@

**Command Documentation**

The trim.seqs_ command provides the preprocessing features needed to screen and sort pyrosequences.  The command will enable you to trim off primer sequences and barcodes, use the barcode information to generate a group file and split a fasta file into sub-files, screen sequences based on the qual file that comes from 454 sequencers, cull sequences based on sequence length and the presence of ambiguous bases and get the reverse complement of your sequences. While this analysis is clearly geared towards pyrosequencing collections, it can also be used with traditional Sanger sequences.

.. _trim.seqs: https://www.mothur.org/wiki/Trim.seqs

]]>
    </help>
    <expand macro="citations"/>
</tool>