Mercurial > repos > vipints > fml_gff3togtf
annotate GFFtools-GX/gbk_to_gff.py @ 3:ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
author | vipints |
---|---|
date | Wed, 11 Jun 2014 16:29:25 -0400 |
parents | |
children |
rev | line source |
---|---|
3
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
1 #!/usr/bin/env python |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
2 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
3 Convert data from Genbank format to GFF. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
4 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
5 Usage: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
6 python gbk_to_gff.py in.gbk > out.gff |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
7 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
8 Requirements: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
9 BioPython:- http://biopython.org/ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
10 helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
11 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
12 Copyright (C) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
13 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
14 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
15 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
16 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
17 import os |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
18 import re |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
19 import sys |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
20 import collections |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
21 from Bio import SeqIO |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
22 import helper |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
23 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
24 def feature_table(chr_id, source, orient, genes, transcripts, cds, exons, unk): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
25 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
26 Write the feature information |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
27 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
28 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
29 for gname, ginfo in genes.items(): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
30 line = [str(chr_id), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
31 'gbk_to_gff', |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
32 ginfo[3], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
33 str(ginfo[0]), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
34 str(ginfo[1]), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
35 '.', |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
36 ginfo[2], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
37 '.', |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
38 'ID=%s;Name=%s' % (str(gname), str(gname))] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
39 print '\t'.join(line) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
40 ## construct the transcript line is not defined in the original file |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
41 t_line = [str(chr_id), 'gbk_to_gff', source, 0, 1, '.', ginfo[2], '.'] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
42 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
43 if not transcripts: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
44 t_line.append('ID=Transcript:%s;Parent=%s' % (str(gname), str(gname))) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
45 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
46 if exons: ## get the entire transcript region from the defined feature |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
47 t_line[3] = str(exons[gname][0][0]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
48 t_line[4] = str(exons[gname][0][-1]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
49 elif cds: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
50 t_line[3] = str(cds[gname][0][0]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
51 t_line[4] = str(cds[gname][0][-1]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
52 print '\t'.join(t_line) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
53 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
54 if exons: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
55 exon_line_print(t_line, exons[gname], 'Transcript:'+str(gname), 'exon') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
56 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
57 if cds: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
58 exon_line_print(t_line, cds[gname], 'Transcript:'+str(gname), 'CDS') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
59 if not exons: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
60 exon_line_print(t_line, cds[gname], 'Transcript:'+str(gname), 'exon') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
61 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
62 else: ## transcript is defined |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
63 for idx in transcripts[gname]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
64 t_line[2] = idx[3] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
65 t_line[3] = str(idx[0]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
66 t_line[4] = str(idx[1]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
67 t_line.append('ID='+str(idx[2])+';Parent='+str(gname)) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
68 print '\t'.join(t_line) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
69 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
70 ## feature line print call |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
71 if exons: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
72 exon_line_print(t_line, exons[gname], str(idx[2]), 'exon') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
73 if cds: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
74 exon_line_print(t_line, cds[gname], str(idx[2]), 'CDS') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
75 if not exons: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
76 exon_line_print(t_line, cds[gname], str(idx[2]), 'exon') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
77 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
78 if len(genes) == 0: ## feature entry with fragment information |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
79 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
80 line = [str(chr_id), 'gbk_to_gff', source, 0, 1, '.', orient, '.'] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
81 fStart = fStop = None |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
82 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
83 for eid, ex in cds.items(): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
84 fStart = ex[0][0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
85 fStop = ex[0][-1] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
86 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
87 for eid, ex in exons.items(): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
88 fStart = ex[0][0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
89 fStop = ex[0][-1] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
90 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
91 if fStart or fStart: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
92 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
93 line[2] = 'gene' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
94 line[3] = str(fStart) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
95 line[4] = str(fStop) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
96 line.append('ID=Unknown_Gene_' + str(unk) + ';Name=Unknown_Gene_' + str(unk)) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
97 print "\t".join(line) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
98 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
99 if not cds: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
100 line[2] = 'transcript' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
101 else: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
102 line[2] = 'mRNA' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
103 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
104 line[8] = 'ID=Unknown_Transcript_' + str(unk) + ';Parent=Unknown_Gene_' + str(unk) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
105 print "\t".join(line) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
106 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
107 if exons: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
108 exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'exon') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
109 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
110 if cds: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
111 exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'CDS') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
112 if not exons: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
113 exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'exon') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
114 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
115 unk +=1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
116 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
117 return unk |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
118 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
119 def exon_line_print(temp_line, trx_exons, parent, ftype): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
120 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
121 Print the EXON feature line |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
122 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
123 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
124 for ex in trx_exons: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
125 temp_line[2] = ftype |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
126 temp_line[3] = str(ex[0]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
127 temp_line[4] = str(ex[1]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
128 temp_line[8] = 'Parent=%s' % parent |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
129 print '\t'.join(temp_line) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
130 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
131 def gbk_parse(fname): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
132 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
133 Extract genome annotation recods from genbank format |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
134 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
135 @args fname: gbk file name |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
136 @type fname: str |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
137 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
138 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
139 fhand = helper.open_file(gbkfname) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
140 unk = 1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
141 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
142 for record in SeqIO.parse(fhand, "genbank"): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
143 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
144 gene_tags = dict() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
145 tx_tags = collections.defaultdict(list) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
146 exon = collections.defaultdict(list) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
147 cds = collections.defaultdict(list) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
148 mol_type, chr_id = None, None |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
149 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
150 for rec in record.features: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
151 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
152 if rec.type == 'source': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
153 try: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
154 mol_type = rec.qualifiers['mol_type'][0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
155 except: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
156 mol_type = '.' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
157 pass |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
158 try: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
159 chr_id = rec.qualifiers['chromosome'][0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
160 except: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
161 chr_id = record.name |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
162 continue |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
163 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
164 strand='-' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
165 strand='+' if rec.strand>0 else strand |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
166 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
167 fid = None |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
168 try: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
169 fid = rec.qualifiers['gene'][0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
170 except: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
171 pass |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
172 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
173 transcript_id = None |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
174 try: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
175 transcript_id = rec.qualifiers['transcript_id'][0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
176 except: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
177 pass |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
178 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
179 if re.search(r'gene', rec.type): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
180 gene_tags[fid] = (rec.location._start.position+1, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
181 rec.location._end.position, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
182 strand, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
183 rec.type |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
184 ) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
185 elif rec.type == 'exon': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
186 exon[fid].append((rec.location._start.position+1, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
187 rec.location._end.position)) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
188 elif rec.type=='CDS': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
189 cds[fid].append((rec.location._start.position+1, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
190 rec.location._end.position)) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
191 else: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
192 # get all transcripts |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
193 if transcript_id: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
194 tx_tags[fid].append((rec.location._start.position+1, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
195 rec.location._end.position, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
196 transcript_id, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
197 rec.type)) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
198 # record extracted, generate feature table |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
199 unk = feature_table(chr_id, mol_type, strand, gene_tags, tx_tags, cds, exon, unk) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
200 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
201 fhand.close() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
202 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
203 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
204 if __name__=='__main__': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
205 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
206 try: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
207 gbkfname = sys.argv[1] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
208 except: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
209 print __doc__ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
210 sys.exit(-1) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
211 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
212 ## extract gbk records |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
213 gbk_parse(gbkfname) |