Mercurial > repos > iuc > dada2_seqcounts

<?xml version="1.0"?>
<macros>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="@DADA2_VERSION@">bioconductor-dada2</requirement>
            <yield/>
        </requirements>
    </xml>
    <xml name="bio_tools">
        <xrefs>
            <xref type="bio.tools">dada2</xref>
            <xref type="bioconductor">dada2</xref>
        </xrefs>
    </xml>
    <token name="@DADA2_VERSION@">1.28</token>
    <token name="@WRAPPER_VERSION@">1</token>

    <xml name="version_command">
        <version_command><![CDATA[
echo $(R --version | grep version | grep -v GNU)", dada2 version" $(R --vanilla --slave -e "library(dada2); cat(sessionInfo()\$otherPkgs\$dada2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
        ]]></version_command>
    </xml>

    <xml name="stdio">
        <stdio>
            <regex match="Error: cannot allocate" source="stderr" level="fatal_oom" description="Out of memory error occurred" />
            <regex match="'Calloc' could not allocate memory" source="stderr" level="fatal_oom" description="Out of memory error occurred" />
        </stdio>
    </xml>

    <xml name="citations">
        <citations>
            <citation type="doi">10.1038/nmeth.3869</citation>
            <yield/>
        </citations>
    </xml>

    <token name="@DADA_UNIQUES@">dada2_dada,dada2_mergepairs</token>

    <!-- function to read dada2 data types
         - dada, and mergepairs are simply read as RDS
         - sequence_table is a named integer matrix (rows=samples, columns=ASVs)
         - uniques is a named integer vector (columns=ASVs, only one rows)-->
    <token name="@READ_FOO@"><![CDATA[
    read.uniques <- function ( fname ) {
         x <- read.table(fname, header=F, sep="\t")
         n <-x[,2]
         names(n)<-x[,1]
         return(n)
    }
    #def read_data($dataset)
        #if $dataset.is_of_type('dada2_sequencetable')
            t(read.table('$dataset', header=T, sep="\t", row.names=1, check.names=FALSE))
        #else if $dataset.is_of_type('dada2_uniques')
            read.uniques('$dataset')
        #else if $dataset.is_of_type('tabular')
            read.table('$dataset', header=T, sep="\t", row.names=1, check.names=FALSE)
        #else
            readRDS('$dataset')
        #end if
    #end def
    ]]></token>
    <!-- function to write dada2 data types (the content or the R variable 'out' is written)
         - dada, and mergepairs are written as RDS
         - sequence_table is a named integer matrix (rows=samples, columns=ASVs)
         - uniques is a named integer vector (columns=ASVs, only one rows)-->
    <token name="@WRITE_FOO@"><![CDATA[
write.data <- function( data, fname, type ){
    if( type == 'dada2_uniques'){
        write.table(data, file = fname, quote = F, sep = "\t", row.names = T, col.names = F)
    }else if( type== 'dada2_sequencetable'){
        write.table(t(data), file=fname, quote=F, sep="\t", row.names = T, col.names = NA)
    }else{
        saveRDS(data, file=fname)
    }
}
    ]]></token>

    <xml name="fastq_input" token_multiple="" token_collection_type="" token_argument_fwd="" token_argument_rev="">
        <conditional name="paired_cond">
            <param name="paired_select" type="select" label="Paired reads">
                <option value="paired">paired - in a data set pair</option>
                <option value="separate">paired - in two separate data sets</option>
                <option value="single">single</option>
            </param>
            <when value="paired">
                <param name="reads" argument="@ARGUMENT_FWD@/@ARGUMENT_REV@" type="data_collection" collection_type="@COLLECTION_TYPE@" format="fastq,fastq.gz" label="Paired short read data"/>
            </when>
            <when value="separate">
                <param name="reads" argument="@ARGUMENT_FWD@" type="data" format="fastq,fastq.gz" multiple="@MULTIPLE@" label="Forward read data"/>
                <param name="sdaer" argument="@ARGUMENT_REV@" type="data" format="fastq,fastq.gz" multiple="@MULTIPLE@" label="Reverse read data"/>
            </when>
            <when value="single">
                <param name="reads" argument="@ARGUMENT_FWD@" type="data" format="fastq,fastq.gz" multiple="@MULTIPLE@" label="Short read data"/>
            </when>
        </conditional>
    </xml>

    <!-- for filterAndTrim -->
    <xml name="trimmers">
        <section name="trim" title="Trimming parameters" expanded="true">
            <param argument="truncQ" type="integer" value="2" min="0" label="Truncate reads at quality threshold" help="Truncate reads at the first instance of a quality score less than or equal to this threshold"/>
            <param argument="trimLeft" type="integer" value="0" min="0" label="Trim start of each read" help="The number of nucleotides to remove from the start of each read."/>
            <param argument="trimRight" type="integer" value="0" min="0" label="Trim end of each read" help="The number of nucleotides to remove from the end of each read"/>
            <param argument="truncLen" type="integer" value="0" min="0" label="Truncate read length" help="Truncate reads after this amount of bases. Reads shorter than this are discarded. (default 0: no truncation)"/>
        </section>
    </xml>
    <xml name="filters">
        <section name="filter" title="Filtering parameters" expanded="true">
            <param argument="maxLen" type="integer" value="" optional="true" min="0" label="Remove long reads" help="Remove reads with length greater than this value. Default: no length threshold"/>
            <param argument="minLen" type="integer" value="20" min="0" label="Remove short reads" help="Remove reads with length less than this value. Default: 20"/>
            <param argument="maxN" type="integer" value="0" min="0" label="Remove reads with more Ns" help="Note that some of the subsequent dada pipeline steps do not allow Ns"/>
            <param argument="minQ" type="integer" value="0" min="0" label="Remove low quality reads" help="Reads contain a quality score less than this value will be discarded"/>
            <param argument="maxEE" type="integer" value="" optional="true" min="0" label="Remove reads by number expected errors" help="Reads with a higher number of expected errors (EE) will be discarded, where EE = sum(10^(-Q_i/10)), with Q are the nominal quality scores at the read positions"/>
        </section>
    </xml>

    <xml name="errorEstimationFunction">
        <param name="errfoo" argument="errorEstimationFunction" type="select" label="Error function">
            <option value="loessErrfun">loess: Use a loess fit to estimate error rates from transition counts</option>
            <option value="noqualErrfun">noqual: Estimate error rates for each type of transition while ignoring quality scores.</option>
            <option value="PacBioErrfun">PacBio: Estimate error rates from transition counts in PacBio CCS data.</option>
        </param>
    </xml>
    <token name="@HELP_OVERVIEW@"><![CDATA[
Overview
........

The intended use of the dada2 tools for paired sequencing data is shown in the following image.

.. image:: pairpipe.png

Note: In particular for the analysis of paired collections the collections should be sorted lexicographical
before the analysis.

For single end data you the steps "Unzip collection" and "mergePairs" are not necessary.

More information may be found on the dada2 homepage:: https://benjjneb.github.io/dada2/index.html (in particular tutorials) or the documentation of dada2's R package https://bioconductor.org/packages/release/bioc/html/dada2.html (in particular the pdf which contains the full documentation of all parameters)
    ]]></token>
</macros>
author	iuc
date	Thu, 23 May 2024 08:51:14 +0000
parents	19299bb1c6af
children	a7817705348e