comparison GFFtools-GX/gff_to_bed.py @ 3:ff2c2e6f4ab3

Uploaded version 2.0.0 of gfftools ready to import to local instance
author vipints
date Wed, 11 Jun 2014 16:29:25 -0400
parents
children
comparison
equal deleted inserted replaced
2:db3c67b03d55 3:ff2c2e6f4ab3
1 #!/usr/bin/env python
2 """
3 Convert genome annotation data in GFF/GTF to a 12 column BED format.
4 BED format typically represents the transcript models.
5
6 Usage: python gff_to_bed.py in.gff > out.bed
7
8 Requirement:
9 GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py
10
11 Copyright (C)
12 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
13 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA.
14 """
15
16 import re
17 import sys
18 import GFFParser
19
20 def writeBED(tinfo):
21 """
22 writing result files in bed format
23
24 @args tinfo: list of genes
25 @args tinfo: numpy object
26 """
27
28 for ent1 in tinfo:
29 for idx, tid in enumerate(ent1['transcripts']):
30 exon_cnt = len(ent1['exons'][idx])
31 exon_len = ''
32 exon_cod = ''
33 rel_start = None
34 rel_stop = None
35 for idz, ex_cod in enumerate(ent1['exons'][idx]):#check for exons of corresponding transcript
36 exon_len += '%d,' % (ex_cod[1]-ex_cod[0]+1)
37 if idz == 0: #calculate the relative start position
38 exon_cod += '0,'
39 rel_start = int(ex_cod[0])
40 rel_stop = ex_cod[1]
41 else:
42 exon_cod += '%d,' % (ex_cod[0]-rel_start)
43 rel_stop = int(ex_cod[1])
44
45 if exon_len:
46 score = '0'
47 score = ent1['score'][0] if ent1['score'] else score
48 out_print = [ent1['chr'],
49 str(rel_start),
50 str(rel_stop),
51 tid[0],
52 score,
53 ent1['strand'],
54 str(rel_start),
55 str(rel_stop),
56 '0',
57 str(exon_cnt),
58 exon_len,
59 exon_cod]
60 print '\t'.join(out_print)
61
62 def __main__():
63 try:
64 query_file = sys.argv[1]
65 except:
66 print __doc__
67 sys.exit(-1)
68
69 Transcriptdb = GFFParser.Parse(query_file)
70 writeBED(Transcriptdb)
71
72 if __name__ == "__main__":
73 __main__()