view edger-repenrich2.xml @ 2:cfb06f8e8f52 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 4ac07201d6267f5efd5c2af20db7f53fce5af8af
author artbio
date Sat, 20 Apr 2024 15:17:04 +0000
parents 6d59fbca2db4
children
line wrap: on
line source

<tool id="edger-repenrich2" name="edgeR-repenrich2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Determines differentially expressed features from RepEnrich2 counts</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edgeR_requirements"/>
    <stdio>
        <regex match="Execution halted"
           source="both"
           level="fatal"
           description="Execution halted." />
        <regex match="Error in"
           source="both"
           level="fatal"
           description="An undefined error occurred, please check your input carefully and contact your administrator." />
        <regex match="Fatal error"
           source="both"
           level="fatal"
           description="An undefined error occurred, please check your input carefully and contact your administrator." />
    </stdio>
    <version_command>
    <![CDATA[
        echo $(R --version | grep version | grep -v GNU)", edgeR version" $(R --vanilla --slave -e "library(edgeR) &&
        cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
    ]]>
    </version_command>
    <command>
    <![CDATA[
        #import json
        Rscript '${__tool_directory__}/edgeR_repenrich2.R'
            --factorName '$factorName'

            --levelNameA '$factorLevel_A'
            #set $factorlevelsA = list()
            #for $file in $countsFiles_A:
                $factorlevelsA.append(str($file))
            #end for
            $factorlevelsA.reverse()
            --levelAfiles '#echo json.dumps(factorlevelsA)#'

            --levelNameB '$factorLevel_B'
            #set $factorlevelsB = list()
            #for $file in $countsFiles_B:
                $factorlevelsB.append(str($file))
            #end for
            $factorlevelsB.reverse()
            --levelBfiles '#echo json.dumps(factorlevelsB)#'

            -o 'edger_out'

            -p '$plots'
            #if $normCounts:
                -n '$counts_out'
            #end if
            -o '$edger_out'
    ]]>
    </command>
    <inputs>
            <param name="factorName" type="text" value="FactorName"  label="Specify a factor name, e.g. genotype or age or drug_x"
                help="Only letters, numbers and underscores will be retained in this field">
                <sanitizer>
                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
                </sanitizer>
            </param>
            <param name="factorLevel_A" type="text" value="FactorLevel1" label="Specify a factor level, typical values could be 'mutant' or 'Drug_X'"
                   help="Only letters, numbers and underscores will be retained in this field">
                <sanitizer>
                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
                </sanitizer>
            </param>
            <param name="countsFiles_A" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich" />
            <param name="factorLevel_B" type="text" value="FactorLevel2" label="Specify a factor level, typical values could be 'wildtype' or 'control'"
                   help="Only letters, numbers and underscores will be retained in this field">
                <sanitizer>
                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
                </sanitizer>
            </param>
            <param name="countsFiles_B" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich tool" />
            <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
            label="Output normalized counts table" />
    </inputs>
    <outputs>
        <data format="tabular" name="edger_out" label="edgeR: ${factorLevel_A} compared to ${factorLevel_B}">
            <actions>
                <action name="column_names" type="metadata" default="Tag,log2(FC),FDR,Class,Type" />
            </actions>
        </data>
        <data format="pdf" name="plots" label="edgeR plots" />
        <data format="tabular" name="counts_out" label="Normalized counts file">
            <filter>normCounts == True</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="factorName" value="Genotype"/>
            <param name="factorLevel_A" value="Mutant"/>
            <param name="countsFiles_A" value="355_fraction_counts.tab,356_fraction_counts.tab"/>
            <param name="factorLevel_B" value="Wildtype"/>
            <param name="countsFiles_B" value="353_fraction_counts.tab,354_fraction_counts.tab"/>
            <param name="normCounts" value="True"/>
            <output name="counts_out" file="Normalized_counts_file.tab"/>
            <output name="plots" file="edgeR_plots.pdf"/>
            <output name="edger_out" file="edgeR_result_file.tab"/>

        </test>
    </tests>
    <help>
<![CDATA[
.. class:: infomark

**What it does**

Estimate Distance between samples (MDS) and Biological Coefficient Variation (BCV) in count
data from high-throughput sequencing assays and test for differential expression using edgeR_.

**Inputs**

edger-repenrich takes count tables generated by repenrich as inputs. A repenrich count table looks
like:

============== ========== ========== ==========
LSU-rRNA_Dme    rRNA       rRNA       3659329
-------------- ---------- ---------- ----------
FW3_DM          LINE       Jockey     831
-------------- ---------- ---------- ----------
DMTOM1_LTR      LTR        Gypsy      1004
-------------- ---------- ---------- ----------
R1_DM           LINE       R1         7343
-------------- ---------- ---------- ----------
TAHRE           LINE       Jockey     4560
-------------- ---------- ---------- ----------
G4_DM           LINE       Jockey     3668
-------------- ---------- ---------- ----------
BS              LINE       Jockey     7296
-------------- ---------- ---------- ----------
Stalker2_I-int  LTR        Gypsy      12252
-------------- ---------- ---------- ----------
Stalker3_LTR    LTR        Gypsy      593
-------------- ---------- ---------- ----------
TABOR_I-int     LTR        Gypsy      3947
-------------- ---------- ---------- ----------
G7_DM           LINE       Jockey     162
-------------- ---------- ---------- ----------
BEL_I-int       LTR        Pao        23757
-------------- ---------- ---------- ----------
Gypsy6_I-int    LTR        Gypsy      7489
============== ========== ========== ==========

Count tables must be generated for each sample individually. Here, edgeR_ is handling a
single factor (genotype, age, treatment, etc) that effect your experiment. This factor has
two levels/states (for instance, "wild-type" and "mutant". You need to select appropriate
count table from your history for each factor level.

The following table gives some examples of factors and their levels:

========= ============== ===============
Factor    Factorlevel1   Factorlevel2
--------- -------------- ---------------
Treatment Treated        Untreated
--------- -------------- ---------------
Genotype  Knockdown      Wildtype
--------- -------------- ---------------
TimePoint Day4           Day1
--------- -------------- ---------------
Gender    Female         Male
========= ============== ===============

*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2.
Here the order of factor levels is important. For example, for the factor 'Treatment' given
in above table, edgeR computes fold changes of 'Treated' samples against 'Untreated',
i.e. the values correspond to up or down regulations of genes in Treated samples.

**Output**

edgeR_ generates a tabular file containing the different columns and results visualized in
a PDF:

====== =============================================================================
Column Description
------ -----------------------------------------------------------------------------
     1 Tag (transposon element ID)
     2 the logarithm (to basis 2) of the fold change (See the note in inputs section)
     3 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
       which controls false discovery rate (FDR)
     4 Class the transposon belongs to
     5 Type the transposon belongs to
====== =============================================================================

.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
]]>

**Note**: This edgeR_ wrapper was adapted from code available at
https://github.com/nskvir/RepEnrich

    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btp616</citation>
    </citations>
</tool>