comparison gtf_to_gff.py @ 10:c42c69aa81f8

fixed manually the upload of version 2.1.0 - deleted accidentally added files to the repo
author vipints <vipin@cbio.mskcc.org>
date Thu, 23 Apr 2015 18:01:45 -0400
parents
children
comparison
equal deleted inserted replaced
9:7d67331368f3 10:c42c69aa81f8
1 #!/usr/bin/env python
2 """
3 Convert Gene Transfer Format [GTF] to Generic Feature Format Version 3 [GFF3].
4
5 Usage: python gtf_to_gff.py in.gtf > out.gff3
6
7 Requirement:
8 GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py
9 helper.py: https://github.com/vipints/GFFtools-GX/blob/master/helper.py
10
11 Copyright (C)
12 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
13 2012-2015 Memorial Sloan Kettering Cancer Center New York City, USA.
14 """
15
16 import re
17 import sys
18 import helper
19 import GFFParser
20
21 def GFFWriter(gtf_content):
22 """
23 write the feature information to GFF format
24
25 @args gtf_content: Parsed object from gtf file
26 @type gtf_content: numpy array
27 """
28
29 sys.stdout.write('##gff-version 3\n')
30 for ent1 in gtf_content:
31 chr_name = ent1['chr']
32 strand = ent1['strand']
33 start = ent1['start']
34 stop = ent1['stop']
35 source = ent1['source']
36 ID = ent1['name']
37 Name = ent1['gene_info']['Name']
38 Name = ID if not Name else Name
39
40 sys.stdout.write('%s\t%s\tgene\t%d\t%d\t.\t%s\t.\tID=%s;Name=%s\n' % (chr_name, source, start, stop, strand, ID, Name))
41 for idx, tid in enumerate(ent1['transcripts']):
42
43 t_start = ent1['exons'][idx][0][0]
44 t_stop = ent1['exons'][idx][-1][-1]
45 t_type = ent1['transcript_type'][idx]
46
47 utr5_exons, utr3_exons = [], []
48 if ent1['exons'][idx].any() and ent1['cds_exons'][idx].any():
49 utr5_exons, utr3_exons = helper.buildUTR(ent1['cds_exons'][idx], ent1['exons'][idx], strand)
50
51 sys.stdout.write('%s\t%s\t%s\t%d\t%d\t.\t%s\t.\tID=%s;Parent=%s\n' % (chr_name, source, t_type, t_start, t_stop, strand, tid[0], ID))
52 for ex_cod in utr5_exons:
53 sys.stdout.write('%s\t%s\tfive_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]))
54
55 for ex_cod in ent1['cds_exons'][idx]:
56 sys.stdout.write('%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, ex_cod[2], tid[0]))
57
58 for ex_cod in utr3_exons:
59 sys.stdout.write('%s\t%s\tthree_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]))
60
61 for ex_cod in ent1['exons'][idx]:
62 sys.stdout.write('%s\t%s\texon\t%d\t%d\t.\t%s\t.\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]))
63
64
65 def __main__():
66
67 try:
68 gtf_fname = sys.argv[1]
69 except:
70 print __doc__
71 sys.exit(-1)
72
73 gtf_file_content = GFFParser.Parse(gtf_fname)
74
75 GFFWriter(gtf_file_content)
76
77 if __name__ == "__main__":
78 __main__()