Mercurial > repos > iuc > deg_annotate
comparison deg_annotate.py @ 1:e98d4ab5b5bc draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deg_annotate commit 44d3dae188cabf4a64dee7c1ebe41c855d95d1b0
author | iuc |
---|---|
date | Wed, 23 Jan 2019 07:47:22 -0500 |
parents | b42373cddb77 |
children |
comparison
equal
deleted
inserted
replaced
0:b42373cddb77 | 1:e98d4ab5b5bc |
---|---|
62 print("No '" + txattr + "' attribute found for the feature at position " + rec.id + ":" + str( | 62 print("No '" + txattr + "' attribute found for the feature at position " + rec.id + ":" + str( |
63 start) + ":" + str(end) + ". Please check your GTF/GFF file.") | 63 start) + ":" + str(end) + ". Please check your GTF/GFF file.") |
64 pass | 64 pass |
65 | 65 |
66 bed_entries = [] | 66 bed_entries = [] |
67 # create BED lines only for deseq output | 67 # create BED lines only for dexeq output |
68 if input_type == "dexseq": | 68 if input_type == "dexseq": |
69 for txid in exon_pos.keys(): | 69 for txid in exon_pos.keys(): |
70 starts = sorted(exon_pos[txid]) | 70 starts = sorted(exon_pos[txid]) |
71 strand = tx_info[txid]['strand'] | 71 strand = tx_info[txid]['strand'] |
72 if strand == '-': | 72 if strand == '-': |
85 def main(): | 85 def main(): |
86 parser = argparse.ArgumentParser(description='Annotate DESeq2/DEXSeq tables with information from GFF/GTF files') | 86 parser = argparse.ArgumentParser(description='Annotate DESeq2/DEXSeq tables with information from GFF/GTF files') |
87 parser.add_argument('-in', '--input', required=True, | 87 parser.add_argument('-in', '--input', required=True, |
88 help='DESeq2/DEXSeq output. It is allowed to have extra information, ' | 88 help='DESeq2/DEXSeq output. It is allowed to have extra information, ' |
89 'but make sure that the original output columns are not altered') | 89 'but make sure that the original output columns are not altered') |
90 parser.add_argument('-m', '--mode', required=True, choices=["deseq2", "dexseq"], default='deseq2', | 90 parser.add_argument('-m', '--mode', required=True, choices=["degseq", "dexseq"], |
91 help='Input file type') | 91 default='degseq', help='Input file type') |
92 parser.add_argument('-g', '--gff', required=True, help='The same annotation GFF/GTF file used for couting') | 92 parser.add_argument('-g', '--gff', required=True, help='The same annotation GFF/GTF file used for couting') |
93 parser.add_argument('-t', '--type', default='exon', required=False, | 93 parser.add_argument('-t', '--type', default='exon', required=False, |
94 help='feature type (3rd column in GFF file) to be used (default: exon)') | 94 help='feature type (3rd column in GFF file) to be used (default: exon)') |
95 parser.add_argument('-i', '--idattr', default='gene_id', required=False, | 95 parser.add_argument('-i', '--idattr', default='gene_id', required=False, |
96 help='GFF attribute to be used as feature ID. ' | 96 help='GFF attribute to be used as feature ID. ' |
144 d_binexon.setdefault(binid, []).append(exonid) | 144 d_binexon.setdefault(binid, []).append(exonid) |
145 | 145 |
146 with open(args.input) as fh_input, open(args.output, 'w') as fh_output: | 146 with open(args.input) as fh_input, open(args.output, 'w') as fh_output: |
147 for line in fh_input: | 147 for line in fh_input: |
148 annot = [] | 148 annot = [] |
149 # Append the extra information from GFF to DESeq2 output | |
150 if args.mode == "deseq2": | |
151 geneid = line.split('\t')[0] | |
152 annot = [str(annotation[geneid]['chr']), | |
153 str(annotation[geneid]['start']), | |
154 str(annotation[geneid]['end']), | |
155 str(annotation[geneid]['strand'])] | |
156 for a in attr: | |
157 annot.append(annotation[geneid][a]) | |
158 # DEXSeq exonic bins might originate from aggrigating multiple genes. They are are separated by '+' | 149 # DEXSeq exonic bins might originate from aggrigating multiple genes. They are are separated by '+' |
159 # Append the attributes from the GFF but keep the order of the aggregated genes and use '+' | 150 # Append the attributes from the GFF but keep the order of the aggregated genes and use '+' |
160 # Aappend the transcript id and exon number from the annotation that correspond to the DEXseq counting bins | 151 # Aappend the transcript id and exon number from the annotation that correspond to the DEXseq counting bins |
161 elif args.mode == "dexseq": | 152 if args.mode == "dexseq": |
162 geneids = line.split('\t')[1].split('+') | 153 geneids = line.split('\t')[1].split('+') |
163 for a in attr: | 154 for a in attr: |
164 tmp = [] | 155 tmp = [] |
165 for geneid in geneids: | 156 for geneid in geneids: |
166 tmp.append(str(annotation[geneid][a])) | 157 tmp.append(str(annotation[geneid][a])) |
169 binid = line.split('\t')[0] | 160 binid = line.split('\t')[0] |
170 try: | 161 try: |
171 annot.append(','.join(sorted(set(d_binexon[binid])))) | 162 annot.append(','.join(sorted(set(d_binexon[binid])))) |
172 except KeyError: | 163 except KeyError: |
173 annot.append('NA') | 164 annot.append('NA') |
165 # Append the extra information from GFF to DESeq2/edgeR/limma output | |
166 else: | |
167 geneid = line.split('\t')[0] | |
168 annot = [str(annotation[geneid]['chr']), | |
169 str(annotation[geneid]['start']), | |
170 str(annotation[geneid]['end']), | |
171 str(annotation[geneid]['strand'])] | |
172 for a in attr: | |
173 annot.append(annotation[geneid][a]) | |
174 fh_output.write(line.rstrip('\n') + '\t' + '\t'.join(annot) + '\n') | 174 fh_output.write(line.rstrip('\n') + '\t' + '\t'.join(annot) + '\n') |
175 | 175 |
176 | 176 |
177 if __name__ == "__main__": | 177 if __name__ == "__main__": |
178 main() | 178 main() |