Mercurial > repos > iuc > deg_annotate

--- a/deg_annotate.py	Fri Nov 23 01:59:47 2018 -0500
+++ b/deg_annotate.py	Wed Jan 23 07:47:22 2019 -0500
@@ -64,7 +64,7 @@
                     pass

     bed_entries = []
-    # create BED lines only for deseq output
+    # create BED lines only for dexeq output
     if input_type == "dexseq":
         for txid in exon_pos.keys():
             starts = sorted(exon_pos[txid])
@@ -87,8 +87,8 @@
     parser.add_argument('-in', '--input', required=True,
                         help='DESeq2/DEXSeq output. It is allowed to have extra information, '
                              'but make sure that the original output columns are not altered')
-    parser.add_argument('-m', '--mode', required=True, choices=["deseq2", "dexseq"], default='deseq2',
-                        help='Input file type')
+    parser.add_argument('-m', '--mode', required=True, choices=["degseq", "dexseq"],
+                        default='degseq', help='Input file type')
     parser.add_argument('-g', '--gff', required=True, help='The same annotation GFF/GTF file used for couting')
     parser.add_argument('-t', '--type', default='exon', required=False,
                         help='feature type (3rd column in GFF file) to be used (default: exon)')
@@ -146,19 +146,10 @@
     with open(args.input) as fh_input, open(args.output, 'w') as fh_output:
         for line in fh_input:
             annot = []
-            # Append the extra information from GFF to DESeq2 output
-            if args.mode == "deseq2":
-                geneid = line.split('\t')[0]
-                annot = [str(annotation[geneid]['chr']),
-                         str(annotation[geneid]['start']),
-                         str(annotation[geneid]['end']),
-                         str(annotation[geneid]['strand'])]
-                for a in attr:
-                    annot.append(annotation[geneid][a])
             # DEXSeq exonic bins might originate from aggrigating multiple genes. They are are separated by '+'
             # Append the attributes from the GFF but keep the order of the aggregated genes and use '+'
             # Aappend the transcript id and exon number from the annotation that correspond to the DEXseq counting bins
-            elif args.mode == "dexseq":
+            if args.mode == "dexseq":
                 geneids = line.split('\t')[1].split('+')
                 for a in attr:
                     tmp = []
@@ -171,6 +162,15 @@
                         annot.append(','.join(sorted(set(d_binexon[binid]))))
                     except KeyError:
                         annot.append('NA')
+            # Append the extra information from GFF to DESeq2/edgeR/limma output
+            else:
+                geneid = line.split('\t')[0]
+                annot = [str(annotation[geneid]['chr']),
+                         str(annotation[geneid]['start']),
+                         str(annotation[geneid]['end']),
+                         str(annotation[geneid]['strand'])]
+                for a in attr:
+                    annot.append(annotation[geneid][a])
             fh_output.write(line.rstrip('\n') + '\t' + '\t'.join(annot) + '\n')
--- a/deg_annotate.xml	Fri Nov 23 01:59:47 2018 -0500
+++ b/deg_annotate.xml	Wed Jan 23 07:47:22 2019 -0500
@@ -1,5 +1,5 @@
-<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.0">
-    <description>Append useful information from annotation files to DESeq2/DEXSeq outputs</description>
+<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.1.0">
+    <description>Append annotation from GTF to differential expression tool outputs</description>
     <requirements>
         <requirement type="package" version="2.27.0">bedtools</requirement>
         <requirement type="package" version="0.6.4">bcbiogff</requirement>
@@ -21,10 +21,10 @@
                type="data"
                format="tabular"
                argument="-in"
-               label="Tabular output of DESeq2 or DEXSeq"/>
+               label="Tabular output of DESeq2/edgeR/limma/DEXSeq"/>

         <param name="mode" type="select" argument="-m" label="Input file type">
-                <option value="deseq2">DESeq2</option>
+                <option value="degseq">DESeq2/edgeR/limma</option>
                 <option value="dexseq">DEXseq</option>
         </param>

@@ -92,6 +92,19 @@
                 </assert_contents>
             </output>
         </test>
+        <test expect_num_outputs="1">
+            <param name="input_table"
+                value="edger_output.tabular"/>
+            <param name="annotation"
+               value="annotation.gtf"/>
+            <param name="mode"
+               value="degseq"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text_matching expression="FBgn0039155\t-4.40480020002641\t5.8344799947229\t573.433304439283\t1.62187751744916e-36\t2.54342832286378e-32\tchr3R\t24141394\t24147490\t\+\tprotein_coding\tKal1"/>
+                </assert_contents>
+            </output>
+        </test>
     </tests>

     <help>
@@ -99,7 +112,7 @@

 **What it does**

-    This tool appends the output table of DESeq2 or DEXSeq with gene symbols, biotypes, positions etc. The information
+    This tool appends the output table of DESeq2/edgeR/limma/DEXSeq with gene symbols, biotypes, positions etc. The information
     you want to add is configurable. This information should present in the input GTF/GFF file as attributes of feature
     you choose.
     DEXSeq-Count tool is used to prepare the DEXSeq compatible annotation (flattened GTF file) from input GTF/GFF. In
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edger_output.tabular	Wed Jan 23 07:47:22 2019 -0500
@@ -0,0 +1,9 @@
+GeneID	logFC	logCPM	F	PValue	FDR
+FBgn0039155	-4.40480020002641	5.8344799947229	573.433304439283	1.62187751744916e-36	2.54342832286378e-32
+FBgn0003360	-3.1582167817878	8.40195244059253	458.432046181347	2.23362781352968e-33	1.75138756858862e-29
+FBgn0025111	2.91430384030448	6.87729756446126	452.782656958761	4.06857731787388e-32	2.12678098329661e-28
+FBgn0029167	-2.27266685614162	8.1768807997853	280.441288717184	7.70643803794637e-27	3.02130903277688e-23
+FBgn0035085	-2.53273511974769	5.55909941151563	251.043462255658	1.90325815010934e-25	5.96937886200293e-22
+FBgn0264475	-2.4548323117841	5.63483344861135	228.705124005753	4.42554793229512e-24	1.15669071123753e-20
+FBgn0039827	-3.99649258355212	4.15961375651663	233.059587232752	9.19923669871639e-24	2.06089185584672e-20
+FBgn0000071	2.57846147731144	4.8005671380096	204.62749320464	5.74900194240093e-23	1.03034290930057e-19