Mercurial > repos > vipints > fml_gff3togtf
comparison gff_to_gtf.py @ 5:6e589f267c14
Uploaded
author | devteam |
---|---|
date | Tue, 04 Nov 2014 12:15:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:619e0fcd9126 | 5:6e589f267c14 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Program to convert data from GFF to GTF | |
4 | |
5 Usage: python gff_to_gtf.py in.gff > out.gtf | |
6 | |
7 Requirement: | |
8 GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py | |
9 | |
10 Copyright (C) | |
11 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. | |
12 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA. | |
13 """ | |
14 | |
15 import re | |
16 import sys | |
17 import GFFParser | |
18 | |
19 def printGTF(tinfo): | |
20 """ | |
21 writing result file in GTF format | |
22 | |
23 @args tinfo: parsed object from gff file | |
24 @type tinfo: numpy array | |
25 """ | |
26 | |
27 for ent1 in tinfo: | |
28 for idx, tid in enumerate(ent1['transcripts']): | |
29 | |
30 exons = ent1['exons'][idx] | |
31 cds_exons = ent1['cds_exons'][idx] | |
32 | |
33 stop_codon = start_codon = () | |
34 | |
35 if ent1['strand'] == '+': | |
36 if cds_exons.any(): | |
37 start_codon = (cds_exons[0][0], cds_exons[0][0]+2) | |
38 stop_codon = (cds_exons[-1][1]-2, cds_exons[-1][1]) | |
39 elif ent1['strand'] == '-': | |
40 if cds_exons.any(): | |
41 start_codon = (cds_exons[-1][1]-2, cds_exons[-1][1]) | |
42 stop_codon = (cds_exons[0][0], cds_exons[0][0]+2) | |
43 else: | |
44 print 'STRAND information missing - %s, skip the transcript - %s' % (ent1['strand'], tid[0]) | |
45 pass | |
46 | |
47 last_cds_cod = 0 | |
48 for idz, ex_cod in enumerate(exons): | |
49 | |
50 print '%s\t%s\texon\t%d\t%d\t.\t%s\t.\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], ex_cod[0], ex_cod[1], ent1['strand'], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']) | |
51 | |
52 if cds_exons.any(): | |
53 try: | |
54 print '%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], cds_exons[idz][0], cds_exons[idz][1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']) | |
55 last_cds_cod = idz | |
56 except: | |
57 pass | |
58 | |
59 if idz == 0: | |
60 print '%s\t%s\tstart_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], start_codon[0], start_codon[1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']) | |
61 | |
62 if stop_codon: | |
63 print '%s\t%s\tstop_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; ' % (ent1['chr'], ent1['source'], stop_codon[0], stop_codon[1], ent1['strand'], cds_exons[last_cds_cod][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']) | |
64 | |
65 | |
66 if __name__ == "__main__": | |
67 | |
68 try: | |
69 gff_fname = sys.argv[1] | |
70 except: | |
71 print __doc__ | |
72 sys.exit(-1) | |
73 | |
74 Transcriptdb = GFFParser.Parse(gff_fname) | |
75 | |
76 printGTF(Transcriptdb) |