annotate Group.py @ 0:57299471d6c1 draft default tip

planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
author yating-l
date Wed, 12 Apr 2017 17:37:47 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
1 from operator import itemgetter
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
2
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
3 # Input: A group: a list that contains lines belonging to the same gene
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
4 class Group:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
5 # Modify "type" column and "attributes" colunm, initialize id, gene, source, stream
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
6 def __init__(self, group):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
7 self.group = group
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
8 self.id = str(group[0][0])
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
9 self.source = str(group[0][1])
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
10 self.stream = str(group[0][6])
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
11 self.gene = str(group[0][8])
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
12 for x in range(0, len(group)):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
13 self.group[x][2] = "CDS"
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
14 self.group[x][8] = "Parent=mRNA_" + self.gene
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
15 self.group[x][3] = int(self.group[x][3])
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
16 self.group[x][4] = int(self.group[x][4])
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
17
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
18 # Order the group elements accoriding to Stream, +: ascanding order, -: descanding order
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
19 def order(self):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
20 self.num = len(self.group)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
21 if self.stream == "+":
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
22 self.group = sorted(self.group, key=itemgetter(3))
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
23 self.min_item = self.group[0][3]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
24 self.max_item = self.group[self.num-1][4]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
25 elif self.stream == "-":
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
26 self.group = sorted(self.group, key=itemgetter(3), reverse=True)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
27 self.min_item = self.group[self.num-1][3]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
28 self.max_item = self.group[0][4]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
29 else:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
30 print("Stream in invalid!\n")
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
31
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
32 def phaseCalculator(self, i, donor = 0):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
33 if i >= self.num:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
34 pass
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
35 else:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
36 self.type = self.group[i][2]
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
37 self.size = self.group[i][4] - self.group[i][3] + 1
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
38 if self.num == 1:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
39 if self.type == "Eterm":
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
40 self.group[i][7] = str(self.size % 3)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
41 else:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
42 self.group[i][7] = "0"
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
43 elif self.num > 1 and i < self.num:
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
44 accept = (3 - donor) % 3
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
45 self.group[i][7] = str(accept)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
46 donor = (self.size - accept) % 3
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
47 i = i + 1
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
48 self.phaseCalculator(i, donor)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
49
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
50
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
51 def writer(self, gff3):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
52 self.order()
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
53 self.phaseCalculator(0)
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
54 gff3.write(self.id + "\t" + self.source + "\tgene\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=" + self.gene + "\n")
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
55 gff3.write(self.id + "\t" + self.source + "\tmRNA\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=mRNA_" + self.gene + ";Parent=" + self.gene + "\n")
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
56 for x in range(0, len(self.group)):
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
57 self.group[x][3] = str(self.group[x][3])
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
58 self.group[x][4] = str(self.group[x][4])
57299471d6c1 planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
yating-l
parents:
diff changeset
59 gff3.write("\t".join(self.group[x]) + "\n")