annotate gff2Togff3.py @ 0:57299471d6c1 draft default tip

planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
author yating-l
date Wed, 12 Apr 2017 17:37:47 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
1 import argparse
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
2 import sys
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
3 import fileinput
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
4 from Group import Group
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
5
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
6 def main():
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
7 parser = argparse.ArgumentParser(description='Get a gff file and the output gff3 file')
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
8 parser.add_argument('--input', help='input gff file')
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
9 parser.add_argument('--output', help='output gff3 file', required=True)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
10 args = parser.parse_args()
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
11 input = args.input
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
12 output = args.output
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
13 if not sys.stdin.isatty():
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
14 c = Convertor(sys.stdin, output)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
15 else:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
16 c = Convertor(input, output)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
17 c.convert()
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
18
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
19 class Convertor:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
20 def __init__(self, input, output):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
21 if type(input) is str:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
22 with open(input) as self.f:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
23 self.li = [line.rstrip().split("\t") for line in self.f]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
24 else:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
25 self.li = [line.rstrip().split("\t") for line in input]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
26 self.gff3 = open(output, "w")
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
27 self.gff3.write("##gff-version 3\n")
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
28
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
29 def convert(self):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
30 index = 0
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
31 while index in range(0, len(self.li)):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
32 index = self.groupAsgene(index)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
33 self.gff3.close()
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
34
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
35
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
36 def groupAsgene(self, start = 0):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
37 gene = self.li[start][8]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
38 index = len(self.li)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
39 for i in range(start+1, len(self.li)):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
40 line = self.li[i]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
41 if gene != line[8]:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
42 index = i
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
43 break
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
44 if index >= len(self.li):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
45 group = self.li[start:len(self.li)]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
46 else:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
47 group = self.li[start:index]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
48 g = Group(group)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
49 g.writer(self.gff3)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
50 return index
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
51
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
52
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
53
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
54
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
55 if __name__ == "__main__":
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
56 main()
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
57
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
58
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
59