diff deg_annotate.xml @ 0:b42373cddb77 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deg_annotate commit bc766aeec78fe74a1d70d608d2f73ba3f2a3e047
author iuc
date Fri, 23 Nov 2018 01:59:47 -0500
parents
children e98d4ab5b5bc
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deg_annotate.xml	Fri Nov 23 01:59:47 2018 -0500
@@ -0,0 +1,125 @@
+<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.0">
+    <description>Append useful information from annotation files to DESeq2/DEXSeq outputs</description>
+    <requirements>
+        <requirement type="package" version="2.27.0">bedtools</requirement>
+        <requirement type="package" version="0.6.4">bcbiogff</requirement>
+    </requirements>
+    <command>
+    <![CDATA[
+        python '$__tool_directory__/deg_annotate.py' -in '$input_table'
+        -m '$mode'
+        -g '$annotation'
+        -t '$advanced_parameters.gff_feature_type'
+        -i '$advanced_parameters.gff_feature_attribute'
+        -x '$advanced_parameters.gff_transcript_attribute'
+        -a '$advanced_parameters.gff_attributes'
+        -o '$output'
+    ]]>
+    </command>
+    <inputs>
+        <param name="input_table"
+               type="data"
+               format="tabular"
+               argument="-in"
+               label="Tabular output of DESeq2 or DEXSeq"/>
+
+        <param name="mode" type="select" argument="-m" label="Input file type">
+                <option value="deseq2">DESeq2</option>
+                <option value="dexseq">DEXseq</option>
+        </param>
+
+        <param name="annotation"
+               type="data"
+               format="gff,gtf,gff3"
+               argument="-g"
+               label="Reference annotation in GFF/GTF format" />
+
+        <section name="advanced_parameters" title="Advanced options">
+            <param name="gff_feature_type"
+                type="text"
+                value="exon"
+                argument="-t"
+                label="GFF feature type"
+                help="This is the 3rd column in GFF file. Only rows which have the matched feature type in the GTF annotation file will be included. `exon' by default." />
+
+            <param name="gff_feature_attribute"
+                type="text"
+                value="gene_id"
+                argument="-i"
+                label="GFF feature identifier"
+                help="GFF attribute to be used as feature identifier. The value of this attribute should match the first column of DESeq2 output (default: gene_id)" />
+
+            <param name="gff_transcript_attribute"
+                type="text"
+                value="transcript_id"
+                argument="-x"
+                label="GFF transcript identifier"
+                help="GFF attribute to be used as transcript identifier. This options is only used for DEXSeq output annotation. Exon numbers are counted for each transcript separately (default: transcript_id)" />
+
+            <param name="gff_attributes"
+                type="text"
+                value="gene_biotype, gene_name"
+                argument="-a"
+                label="GFF attributes to include"
+                help="Comma separated list of attributes from GFF file to include in output. These attributes should associate with your chosen GFF feature type." />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_table"
+                value="deseq2_output.tabular"/>
+            <param name="annotation"
+               value="annotation.gtf"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text_matching expression="FBgn0025111\t2192.32236942864\t2.69993841720991\t0.0979447231457099\t27.565940568266\t2.8504782974107e-167\t6.1121380892229e-164\tchrX\t10778953\t10786907\t-\tprotein_coding\tAnt2"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_table"
+                value="dexseq_output.tabular"/>
+            <param name="annotation"
+               value="annotation.gtf"/>
+            <param name="mode"
+               value="dexseq"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text_matching expression="FBgn0025111\+FBgn0003360:E005\tFBgn0025111\+FBgn0003360\tE005\t0.273966640920426\t6.62572321505791\t0.774068626605711\t0.378961325638675\tNA\t0.41523701984849\t1.17020080867011\t2.99101950917789\tchrX\t10780596\t10780661\t66\t-\t10\t0\t0\t0\t0\t0\t2\tFBtr0073425, FBtr0333963\tprotein_coding\+protein_coding\tAnt2\+sesB\tFBtr0073425:1,FBtr0333963:1"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help>
+       <![CDATA[
+
+**What it does**
+
+    This tool appends the output table of DESeq2 or DEXSeq with gene symbols, biotypes, positions etc. The information
+    you want to add is configurable. This information should present in the input GTF/GFF file as attributes of feature
+    you choose.
+    DEXSeq-Count tool is used to prepare the DEXSeq compatible annotation (flattened GTF file) from input GTF/GFF. In
+    this  process, the exons that appear multiple times, once for each transcript are collapsed to so called
+    *exon counting bins*. Counting bins for parts of exons arise when an exonic region appears with different
+    boundaries in different transcripts. The resulting flattened GTF file contains pseudo exon ids per gene instead
+    of per transcript. This tool maps the DEXSeq couting bins back to the original exon ids. This mapping is only
+    possible if the input GTF/GFF file contains transcript identifier attribute for the chosen features type.
+
+**Inputs**
+
+**Differential gene expression tables**
+    At the moment, this tool supports DESeq2 and DEXSeq tool outputs.
+
+**Annotation**
+    Annotation file ne GTF or GFF3 format that was used for counting.
+
+**Outputs**
+    Input tabular file and with chosen attributes appended as additional columns.
+
+        ]]>
+    </help>
+</tool>