Mercurial > repos > iuc > deg_annotate
view deg_annotate.xml @ 1:e98d4ab5b5bc draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deg_annotate commit 44d3dae188cabf4a64dee7c1ebe41c855d95d1b0
author | iuc |
---|---|
date | Wed, 23 Jan 2019 07:47:22 -0500 |
parents | b42373cddb77 |
children |
line wrap: on
line source
<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.1.0"> <description>Append annotation from GTF to differential expression tool outputs</description> <requirements> <requirement type="package" version="2.27.0">bedtools</requirement> <requirement type="package" version="0.6.4">bcbiogff</requirement> </requirements> <command> <![CDATA[ python '$__tool_directory__/deg_annotate.py' -in '$input_table' -m '$mode' -g '$annotation' -t '$advanced_parameters.gff_feature_type' -i '$advanced_parameters.gff_feature_attribute' -x '$advanced_parameters.gff_transcript_attribute' -a '$advanced_parameters.gff_attributes' -o '$output' ]]> </command> <inputs> <param name="input_table" type="data" format="tabular" argument="-in" label="Tabular output of DESeq2/edgeR/limma/DEXSeq"/> <param name="mode" type="select" argument="-m" label="Input file type"> <option value="degseq">DESeq2/edgeR/limma</option> <option value="dexseq">DEXseq</option> </param> <param name="annotation" type="data" format="gff,gtf,gff3" argument="-g" label="Reference annotation in GFF/GTF format" /> <section name="advanced_parameters" title="Advanced options"> <param name="gff_feature_type" type="text" value="exon" argument="-t" label="GFF feature type" help="This is the 3rd column in GFF file. Only rows which have the matched feature type in the GTF annotation file will be included. `exon' by default." /> <param name="gff_feature_attribute" type="text" value="gene_id" argument="-i" label="GFF feature identifier" help="GFF attribute to be used as feature identifier. The value of this attribute should match the first column of DESeq2 output (default: gene_id)" /> <param name="gff_transcript_attribute" type="text" value="transcript_id" argument="-x" label="GFF transcript identifier" help="GFF attribute to be used as transcript identifier. This options is only used for DEXSeq output annotation. Exon numbers are counted for each transcript separately (default: transcript_id)" /> <param name="gff_attributes" type="text" value="gene_biotype, gene_name" argument="-a" label="GFF attributes to include" help="Comma separated list of attributes from GFF file to include in output. These attributes should associate with your chosen GFF feature type." /> </section> </inputs> <outputs> <data name="output" format="tabular" label="${tool.name} on ${on_string}"/> </outputs> <tests> <test expect_num_outputs="1"> <param name="input_table" value="deseq2_output.tabular"/> <param name="annotation" value="annotation.gtf"/> <output name="output"> <assert_contents> <has_text_matching expression="FBgn0025111\t2192.32236942864\t2.69993841720991\t0.0979447231457099\t27.565940568266\t2.8504782974107e-167\t6.1121380892229e-164\tchrX\t10778953\t10786907\t-\tprotein_coding\tAnt2"/> </assert_contents> </output> </test> <test expect_num_outputs="1"> <param name="input_table" value="dexseq_output.tabular"/> <param name="annotation" value="annotation.gtf"/> <param name="mode" value="dexseq"/> <output name="output"> <assert_contents> <has_text_matching expression="FBgn0025111\+FBgn0003360:E005\tFBgn0025111\+FBgn0003360\tE005\t0.273966640920426\t6.62572321505791\t0.774068626605711\t0.378961325638675\tNA\t0.41523701984849\t1.17020080867011\t2.99101950917789\tchrX\t10780596\t10780661\t66\t-\t10\t0\t0\t0\t0\t0\t2\tFBtr0073425, FBtr0333963\tprotein_coding\+protein_coding\tAnt2\+sesB\tFBtr0073425:1,FBtr0333963:1"/> </assert_contents> </output> </test> <test expect_num_outputs="1"> <param name="input_table" value="edger_output.tabular"/> <param name="annotation" value="annotation.gtf"/> <param name="mode" value="degseq"/> <output name="output"> <assert_contents> <has_text_matching expression="FBgn0039155\t-4.40480020002641\t5.8344799947229\t573.433304439283\t1.62187751744916e-36\t2.54342832286378e-32\tchr3R\t24141394\t24147490\t\+\tprotein_coding\tKal1"/> </assert_contents> </output> </test> </tests> <help> <![CDATA[ **What it does** This tool appends the output table of DESeq2/edgeR/limma/DEXSeq with gene symbols, biotypes, positions etc. The information you want to add is configurable. This information should present in the input GTF/GFF file as attributes of feature you choose. DEXSeq-Count tool is used to prepare the DEXSeq compatible annotation (flattened GTF file) from input GTF/GFF. In this process, the exons that appear multiple times, once for each transcript are collapsed to so called *exon counting bins*. Counting bins for parts of exons arise when an exonic region appears with different boundaries in different transcripts. The resulting flattened GTF file contains pseudo exon ids per gene instead of per transcript. This tool maps the DEXSeq couting bins back to the original exon ids. This mapping is only possible if the input GTF/GFF file contains transcript identifier attribute for the chosen features type. **Inputs** **Differential gene expression tables** At the moment, this tool supports DESeq2 and DEXSeq tool outputs. **Annotation** Annotation file ne GTF or GFF3 format that was used for counting. **Outputs** Input tabular file and with chosen attributes appended as additional columns. ]]> </help> </tool>