Mercurial > repos > iuc > dada2_seqcounts

<tool id="dada2_seqCounts" name="dada2: sequence counts" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
    Rscript '$dada2_script'
    ]]></command>
    <configfiles>
        <configfile name="dada2_script"><![CDATA[
@READ_FOO@
library(dada2, quietly=T)

getN <- function(x){ sum(getUniques(x)) }

df <- NULL
#for $i, $rep in enumerate($inrep)
    samples = list()
    #for $s in $rep.input:
        ## for collection input assume identifiers are sample names
        #if $s.ext in ["tabular", "dada2_dada", "dada2_mergepairs"]
            sample_name <- '$s.element_identifier'
            samples[[sample_name]] <- $read_data( $s )
        #else
            samples <- $read_data( $s )
        #end if
        #if $s.ext == "tabular"
            tabular <- T
        #else
            tabular <- F
        #end if
    #end for

    #if str($rep.name) == ""
        dname <- '$str(i)'
    #else
        dname <- '$rep.name'
    #end if

    if( tabular ){
        tdf <- NULL
        for( n in names( samples ) ){
            if(is.null(tdf)){
                tdf <- samples[[n]]
            }else{
                tdf <- rbind(tdf, samples[[n]])
            }
        }
        names(tdf) <- paste( dname, names(tdf) )
        tdf <- cbind( data.frame(samples=names( samples )), tdf)
    }else{
        if(is.null(names(samples))){
            tdf <- data.frame( samples = row.names(samples) )
        }else{
            tdf <- data.frame( samples = names(samples) )
        }
        t <- tryCatch({
            sapply(samples, getN)
        },
        error=function(cond) {
            rowSums(samples)
        })
        tdf[[ dname ]] <- t
    }
    if(is.null(df)){
        df <- tdf
    }else{
        df <- merge( df, tdf, by="samples", all=T, no.dups=T)
    }
#end for
write.table(df, "$counts", quote=F, sep="\t", row.names = F, col.names = T)
    ]]></configfile>
    </configfiles>
    <inputs>
        <repeat name="inrep" title="data sets" min="1">
            <param name="input" type="data" multiple="true" format="tabular,@DADA_UNIQUES@,dada2_sequencetable,dada2_uniques" label="Dataset(s)"/>
            <param name="name" type="text" value="" optional="true" label="name"/>
        </repeat>
    </inputs>
    <outputs>
        <data name="counts" format="tabular" label="${tool.name} on ${on_string}"/>
    </outputs>
    <tests>
        <!-- test for the separate inputs (didn't implement as single test using the repeat
             since the sample name won't match anyway [galaxy does not allow to specify the
             names of the elements of the input collection for <param ... multiple="true">]) -->
        <test>
            <repeat name="inrep">
                <param name="input" value="F3D0_S188_L001_R1_001.tab" ftype="tabular"/>
                <param name="name" value="filter"/>
            </repeat>
            <output name="counts" value="seqCounts_filter.tab" ftype="tabular" />
        </test>
        <test>
            <repeat name="inrep">
                <param name="input" value="dada_F3D0_S188_L001_R1.Rdata,dada_F3D141_S207_L001_R1.Rdata" ftype="dada2_dada"/>
                <param name="name" value="dadaF"/>
            </repeat>
            <output name="counts" value="seqCounts_dadaF.tab" ftype="tabular" />
        </test>
        <test>
            <repeat name="inrep">
                <param name="input" value="mergePairs_F3D0_S188_L001.Rdata,mergePairs_F3D141_S207_L001.Rdata" ftype="dada2_mergepairs"/>
                <param name="name" value="merge"/>
            </repeat>
            <output name="counts" value="seqCounts_merge.tab" ftype="tabular" />
        </test>
        <test>
            <repeat name="inrep">
                <param name="input" value="makeSequenceTable.tab" ftype="dada2_sequencetable"/>
                <param name="name" value="seqtab"/>
            </repeat>
            <output name="counts" value="seqCounts_seqtab.tab" ftype="tabular" />
        </test>
        <test>
            <repeat name="inrep">
                <param name="input" value="removeBimeraDenovo.tab" ftype="dada2_sequencetable"/>
                <param name="name" value="nochim"/>
            </repeat>
            <output name="counts" value="seqCounts_nochim.tab" ftype="tabular" />
        </test>
    </tests>
    <help><![CDATA[
Description
...........

Get the counts of sequences per sample for the different stages of the dada pipeline.

Usage
.....

**Inputs:**

Any number of results of dada2 steps in the following form:


- a collection of results from dada, mergePairs, or the collection of statistics from filterAndTrim (the identifiers of the collection elements are used as sample names)
- the results of dada in non-batch mode
- the result of makeSequenceTable or removeBimeraDenovo

**Output:**

A table containing the number of sequences per sample (rows) for each input (columns)

Details
.......

For results from

- dada, and mergePairs the sum of the result of dada2's getUniques function is used
- makeSequenceTable, and removeBimeraDenovo R's rowSums function is used

@HELP_OVERVIEW@
    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Tue, 11 Feb 2025 23:29:44 +0000
parents	9058d8943303
children