comparison deg_annotate.py @ 1:e98d4ab5b5bc draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deg_annotate commit 44d3dae188cabf4a64dee7c1ebe41c855d95d1b0
author iuc
date Wed, 23 Jan 2019 07:47:22 -0500
parents b42373cddb77
children
comparison
equal deleted inserted replaced
0:b42373cddb77 1:e98d4ab5b5bc
62 print("No '" + txattr + "' attribute found for the feature at position " + rec.id + ":" + str( 62 print("No '" + txattr + "' attribute found for the feature at position " + rec.id + ":" + str(
63 start) + ":" + str(end) + ". Please check your GTF/GFF file.") 63 start) + ":" + str(end) + ". Please check your GTF/GFF file.")
64 pass 64 pass
65 65
66 bed_entries = [] 66 bed_entries = []
67 # create BED lines only for deseq output 67 # create BED lines only for dexeq output
68 if input_type == "dexseq": 68 if input_type == "dexseq":
69 for txid in exon_pos.keys(): 69 for txid in exon_pos.keys():
70 starts = sorted(exon_pos[txid]) 70 starts = sorted(exon_pos[txid])
71 strand = tx_info[txid]['strand'] 71 strand = tx_info[txid]['strand']
72 if strand == '-': 72 if strand == '-':
85 def main(): 85 def main():
86 parser = argparse.ArgumentParser(description='Annotate DESeq2/DEXSeq tables with information from GFF/GTF files') 86 parser = argparse.ArgumentParser(description='Annotate DESeq2/DEXSeq tables with information from GFF/GTF files')
87 parser.add_argument('-in', '--input', required=True, 87 parser.add_argument('-in', '--input', required=True,
88 help='DESeq2/DEXSeq output. It is allowed to have extra information, ' 88 help='DESeq2/DEXSeq output. It is allowed to have extra information, '
89 'but make sure that the original output columns are not altered') 89 'but make sure that the original output columns are not altered')
90 parser.add_argument('-m', '--mode', required=True, choices=["deseq2", "dexseq"], default='deseq2', 90 parser.add_argument('-m', '--mode', required=True, choices=["degseq", "dexseq"],
91 help='Input file type') 91 default='degseq', help='Input file type')
92 parser.add_argument('-g', '--gff', required=True, help='The same annotation GFF/GTF file used for couting') 92 parser.add_argument('-g', '--gff', required=True, help='The same annotation GFF/GTF file used for couting')
93 parser.add_argument('-t', '--type', default='exon', required=False, 93 parser.add_argument('-t', '--type', default='exon', required=False,
94 help='feature type (3rd column in GFF file) to be used (default: exon)') 94 help='feature type (3rd column in GFF file) to be used (default: exon)')
95 parser.add_argument('-i', '--idattr', default='gene_id', required=False, 95 parser.add_argument('-i', '--idattr', default='gene_id', required=False,
96 help='GFF attribute to be used as feature ID. ' 96 help='GFF attribute to be used as feature ID. '
144 d_binexon.setdefault(binid, []).append(exonid) 144 d_binexon.setdefault(binid, []).append(exonid)
145 145
146 with open(args.input) as fh_input, open(args.output, 'w') as fh_output: 146 with open(args.input) as fh_input, open(args.output, 'w') as fh_output:
147 for line in fh_input: 147 for line in fh_input:
148 annot = [] 148 annot = []
149 # Append the extra information from GFF to DESeq2 output
150 if args.mode == "deseq2":
151 geneid = line.split('\t')[0]
152 annot = [str(annotation[geneid]['chr']),
153 str(annotation[geneid]['start']),
154 str(annotation[geneid]['end']),
155 str(annotation[geneid]['strand'])]
156 for a in attr:
157 annot.append(annotation[geneid][a])
158 # DEXSeq exonic bins might originate from aggrigating multiple genes. They are are separated by '+' 149 # DEXSeq exonic bins might originate from aggrigating multiple genes. They are are separated by '+'
159 # Append the attributes from the GFF but keep the order of the aggregated genes and use '+' 150 # Append the attributes from the GFF but keep the order of the aggregated genes and use '+'
160 # Aappend the transcript id and exon number from the annotation that correspond to the DEXseq counting bins 151 # Aappend the transcript id and exon number from the annotation that correspond to the DEXseq counting bins
161 elif args.mode == "dexseq": 152 if args.mode == "dexseq":
162 geneids = line.split('\t')[1].split('+') 153 geneids = line.split('\t')[1].split('+')
163 for a in attr: 154 for a in attr:
164 tmp = [] 155 tmp = []
165 for geneid in geneids: 156 for geneid in geneids:
166 tmp.append(str(annotation[geneid][a])) 157 tmp.append(str(annotation[geneid][a]))
169 binid = line.split('\t')[0] 160 binid = line.split('\t')[0]
170 try: 161 try:
171 annot.append(','.join(sorted(set(d_binexon[binid])))) 162 annot.append(','.join(sorted(set(d_binexon[binid]))))
172 except KeyError: 163 except KeyError:
173 annot.append('NA') 164 annot.append('NA')
165 # Append the extra information from GFF to DESeq2/edgeR/limma output
166 else:
167 geneid = line.split('\t')[0]
168 annot = [str(annotation[geneid]['chr']),
169 str(annotation[geneid]['start']),
170 str(annotation[geneid]['end']),
171 str(annotation[geneid]['strand'])]
172 for a in attr:
173 annot.append(annotation[geneid][a])
174 fh_output.write(line.rstrip('\n') + '\t' + '\t'.join(annot) + '\n') 174 fh_output.write(line.rstrip('\n') + '\t' + '\t'.join(annot) + '\n')
175 175
176 176
177 if __name__ == "__main__": 177 if __name__ == "__main__":
178 main() 178 main()