Mercurial > repos > iuc > deg_annotate

<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.1.0">
    <description>Append annotation from GTF to differential expression tool outputs</description>
    <requirements>
        <requirement type="package" version="2.27.0">bedtools</requirement>
        <requirement type="package" version="0.6.4">bcbiogff</requirement>
    </requirements>
    <command>
    <![CDATA[
        python '$__tool_directory__/deg_annotate.py' -in '$input_table'
        -m '$mode'
        -g '$annotation'
        -t '$advanced_parameters.gff_feature_type'
        -i '$advanced_parameters.gff_feature_attribute'
        -x '$advanced_parameters.gff_transcript_attribute'
        -a '$advanced_parameters.gff_attributes'
        -o '$output'
    ]]>
    </command>
    <inputs>
        <param name="input_table"
               type="data"
               format="tabular"
               argument="-in"
               label="Tabular output of DESeq2/edgeR/limma/DEXSeq"/>

        <param name="mode" type="select" argument="-m" label="Input file type">
                <option value="degseq">DESeq2/edgeR/limma</option>
                <option value="dexseq">DEXseq</option>
        </param>

        <param name="annotation"
               type="data"
               format="gff,gtf,gff3"
               argument="-g"
               label="Reference annotation in GFF/GTF format" />

        <section name="advanced_parameters" title="Advanced options">
            <param name="gff_feature_type"
                type="text"
                value="exon"
                argument="-t"
                label="GFF feature type"
                help="This is the 3rd column in GFF file. Only rows which have the matched feature type in the GTF annotation file will be included. `exon' by default." />

            <param name="gff_feature_attribute"
                type="text"
                value="gene_id"
                argument="-i"
                label="GFF feature identifier"
                help="GFF attribute to be used as feature identifier. The value of this attribute should match the first column of DESeq2 output (default: gene_id)" />

            <param name="gff_transcript_attribute"
                type="text"
                value="transcript_id"
                argument="-x"
                label="GFF transcript identifier"
                help="GFF attribute to be used as transcript identifier. This options is only used for DEXSeq output annotation. Exon numbers are counted for each transcript separately (default: transcript_id)" />

            <param name="gff_attributes"
                type="text"
                value="gene_biotype, gene_name"
                argument="-a"
                label="GFF attributes to include"
                help="Comma separated list of attributes from GFF file to include in output. These attributes should associate with your chosen GFF feature type." />
        </section>
    </inputs>
    <outputs>
        <data name="output" format="tabular" label="${tool.name} on ${on_string}"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input_table"
                value="deseq2_output.tabular"/>
            <param name="annotation"
               value="annotation.gtf"/>
            <output name="output">
                <assert_contents>
                    <has_text_matching expression="FBgn0025111\t2192.32236942864\t2.69993841720991\t0.0979447231457099\t27.565940568266\t2.8504782974107e-167\t6.1121380892229e-164\tchrX\t10778953\t10786907\t-\tprotein_coding\tAnt2"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_table"
                value="dexseq_output.tabular"/>
            <param name="annotation"
               value="annotation.gtf"/>
            <param name="mode"
               value="dexseq"/>
            <output name="output">
                <assert_contents>
                    <has_text_matching expression="FBgn0025111\+FBgn0003360:E005\tFBgn0025111\+FBgn0003360\tE005\t0.273966640920426\t6.62572321505791\t0.774068626605711\t0.378961325638675\tNA\t0.41523701984849\t1.17020080867011\t2.99101950917789\tchrX\t10780596\t10780661\t66\t-\t10\t0\t0\t0\t0\t0\t2\tFBtr0073425, FBtr0333963\tprotein_coding\+protein_coding\tAnt2\+sesB\tFBtr0073425:1,FBtr0333963:1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_table"
                value="edger_output.tabular"/>
            <param name="annotation"
               value="annotation.gtf"/>
            <param name="mode"
               value="degseq"/>
            <output name="output">
                <assert_contents>
                    <has_text_matching expression="FBgn0039155\t-4.40480020002641\t5.8344799947229\t573.433304439283\t1.62187751744916e-36\t2.54342832286378e-32\tchr3R\t24141394\t24147490\t\+\tprotein_coding\tKal1"/>
                </assert_contents>
            </output>
        </test>
    </tests>

    <help>
       <![CDATA[

**What it does**

    This tool appends the output table of DESeq2/edgeR/limma/DEXSeq with gene symbols, biotypes, positions etc. The information
    you want to add is configurable. This information should present in the input GTF/GFF file as attributes of feature
    you choose.
    DEXSeq-Count tool is used to prepare the DEXSeq compatible annotation (flattened GTF file) from input GTF/GFF. In
    this  process, the exons that appear multiple times, once for each transcript are collapsed to so called
    *exon counting bins*. Counting bins for parts of exons arise when an exonic region appears with different
    boundaries in different transcripts. The resulting flattened GTF file contains pseudo exon ids per gene instead
    of per transcript. This tool maps the DEXSeq couting bins back to the original exon ids. This mapping is only
    possible if the input GTF/GFF file contains transcript identifier attribute for the chosen features type.

**Inputs**

**Differential gene expression tables**
    At the moment, this tool supports DESeq2 and DEXSeq tool outputs.

**Annotation**
    Annotation file ne GTF or GFF3 format that was used for counting.

**Outputs**
    Input tabular file and with chosen attributes appended as additional columns.

        ]]>
    </help>
</tool>
author	iuc
date	Wed, 23 Jan 2019 07:47:22 -0500
parents	b42373cddb77
children