Mercurial > repos > iuc > deg_annotate
changeset 1:e98d4ab5b5bc draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deg_annotate commit 44d3dae188cabf4a64dee7c1ebe41c855d95d1b0
author | iuc |
---|---|
date | Wed, 23 Jan 2019 07:47:22 -0500 |
parents | b42373cddb77 |
children | |
files | deg_annotate.py deg_annotate.xml test-data/edger_output.tabular |
diffstat | 3 files changed, 40 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/deg_annotate.py Fri Nov 23 01:59:47 2018 -0500 +++ b/deg_annotate.py Wed Jan 23 07:47:22 2019 -0500 @@ -64,7 +64,7 @@ pass bed_entries = [] - # create BED lines only for deseq output + # create BED lines only for dexeq output if input_type == "dexseq": for txid in exon_pos.keys(): starts = sorted(exon_pos[txid]) @@ -87,8 +87,8 @@ parser.add_argument('-in', '--input', required=True, help='DESeq2/DEXSeq output. It is allowed to have extra information, ' 'but make sure that the original output columns are not altered') - parser.add_argument('-m', '--mode', required=True, choices=["deseq2", "dexseq"], default='deseq2', - help='Input file type') + parser.add_argument('-m', '--mode', required=True, choices=["degseq", "dexseq"], + default='degseq', help='Input file type') parser.add_argument('-g', '--gff', required=True, help='The same annotation GFF/GTF file used for couting') parser.add_argument('-t', '--type', default='exon', required=False, help='feature type (3rd column in GFF file) to be used (default: exon)') @@ -146,19 +146,10 @@ with open(args.input) as fh_input, open(args.output, 'w') as fh_output: for line in fh_input: annot = [] - # Append the extra information from GFF to DESeq2 output - if args.mode == "deseq2": - geneid = line.split('\t')[0] - annot = [str(annotation[geneid]['chr']), - str(annotation[geneid]['start']), - str(annotation[geneid]['end']), - str(annotation[geneid]['strand'])] - for a in attr: - annot.append(annotation[geneid][a]) # DEXSeq exonic bins might originate from aggrigating multiple genes. They are are separated by '+' # Append the attributes from the GFF but keep the order of the aggregated genes and use '+' # Aappend the transcript id and exon number from the annotation that correspond to the DEXseq counting bins - elif args.mode == "dexseq": + if args.mode == "dexseq": geneids = line.split('\t')[1].split('+') for a in attr: tmp = [] @@ -171,6 +162,15 @@ annot.append(','.join(sorted(set(d_binexon[binid])))) except KeyError: annot.append('NA') + # Append the extra information from GFF to DESeq2/edgeR/limma output + else: + geneid = line.split('\t')[0] + annot = [str(annotation[geneid]['chr']), + str(annotation[geneid]['start']), + str(annotation[geneid]['end']), + str(annotation[geneid]['strand'])] + for a in attr: + annot.append(annotation[geneid][a]) fh_output.write(line.rstrip('\n') + '\t' + '\t'.join(annot) + '\n')
--- a/deg_annotate.xml Fri Nov 23 01:59:47 2018 -0500 +++ b/deg_annotate.xml Wed Jan 23 07:47:22 2019 -0500 @@ -1,5 +1,5 @@ -<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.0"> - <description>Append useful information from annotation files to DESeq2/DEXSeq outputs</description> +<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.1.0"> + <description>Append annotation from GTF to differential expression tool outputs</description> <requirements> <requirement type="package" version="2.27.0">bedtools</requirement> <requirement type="package" version="0.6.4">bcbiogff</requirement> @@ -21,10 +21,10 @@ type="data" format="tabular" argument="-in" - label="Tabular output of DESeq2 or DEXSeq"/> + label="Tabular output of DESeq2/edgeR/limma/DEXSeq"/> <param name="mode" type="select" argument="-m" label="Input file type"> - <option value="deseq2">DESeq2</option> + <option value="degseq">DESeq2/edgeR/limma</option> <option value="dexseq">DEXseq</option> </param> @@ -92,6 +92,19 @@ </assert_contents> </output> </test> + <test expect_num_outputs="1"> + <param name="input_table" + value="edger_output.tabular"/> + <param name="annotation" + value="annotation.gtf"/> + <param name="mode" + value="degseq"/> + <output name="output"> + <assert_contents> + <has_text_matching expression="FBgn0039155\t-4.40480020002641\t5.8344799947229\t573.433304439283\t1.62187751744916e-36\t2.54342832286378e-32\tchr3R\t24141394\t24147490\t\+\tprotein_coding\tKal1"/> + </assert_contents> + </output> + </test> </tests> <help> @@ -99,7 +112,7 @@ **What it does** - This tool appends the output table of DESeq2 or DEXSeq with gene symbols, biotypes, positions etc. The information + This tool appends the output table of DESeq2/edgeR/limma/DEXSeq with gene symbols, biotypes, positions etc. The information you want to add is configurable. This information should present in the input GTF/GFF file as attributes of feature you choose. DEXSeq-Count tool is used to prepare the DEXSeq compatible annotation (flattened GTF file) from input GTF/GFF. In
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edger_output.tabular Wed Jan 23 07:47:22 2019 -0500 @@ -0,0 +1,9 @@ +GeneID logFC logCPM F PValue FDR +FBgn0039155 -4.40480020002641 5.8344799947229 573.433304439283 1.62187751744916e-36 2.54342832286378e-32 +FBgn0003360 -3.1582167817878 8.40195244059253 458.432046181347 2.23362781352968e-33 1.75138756858862e-29 +FBgn0025111 2.91430384030448 6.87729756446126 452.782656958761 4.06857731787388e-32 2.12678098329661e-28 +FBgn0029167 -2.27266685614162 8.1768807997853 280.441288717184 7.70643803794637e-27 3.02130903277688e-23 +FBgn0035085 -2.53273511974769 5.55909941151563 251.043462255658 1.90325815010934e-25 5.96937886200293e-22 +FBgn0264475 -2.4548323117841 5.63483344861135 228.705124005753 4.42554793229512e-24 1.15669071123753e-20 +FBgn0039827 -3.99649258355212 4.15961375651663 233.059587232752 9.19923669871639e-24 2.06089185584672e-20 +FBgn0000071 2.57846147731144 4.8005671380096 204.62749320464 5.74900194240093e-23 1.03034290930057e-19