diff gff2Togff3.py @ 0:57299471d6c1 draft default tip

planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
author yating-l
date Wed, 12 Apr 2017 17:37:47 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gff2Togff3.py	Wed Apr 12 17:37:47 2017 -0400
@@ -0,0 +1,59 @@
+import argparse
+import sys
+import fileinput
+from Group import Group
+
+def main():
+        parser = argparse.ArgumentParser(description='Get a gff file and the output gff3 file')
+        parser.add_argument('--input', help='input gff file')
+        parser.add_argument('--output', help='output gff3 file', required=True)
+        args = parser.parse_args()
+        input = args.input
+        output = args.output
+        if not sys.stdin.isatty():
+            c = Convertor(sys.stdin, output)
+        else: 
+            c = Convertor(input, output)
+        c.convert()
+        
+class Convertor:    
+    def __init__(self, input, output):
+        if type(input) is str:
+            with open(input) as self.f:
+                self.li = [line.rstrip().split("\t") for line in self.f]
+        else:
+            self.li = [line.rstrip().split("\t") for line in input]
+        self.gff3 = open(output, "w")
+        self.gff3.write("##gff-version 3\n")
+
+    def convert(self):
+        index = 0
+        while index in range(0, len(self.li)):
+            index = self.groupAsgene(index)
+        self.gff3.close()
+                
+                    
+    def groupAsgene(self, start = 0):
+        gene = self.li[start][8]
+        index = len(self.li)
+        for i in range(start+1, len(self.li)):
+            line = self.li[i]
+            if gene != line[8]:
+                index = i
+                break
+        if index >= len(self.li):
+            group = self.li[start:len(self.li)]
+        else:
+            group = self.li[start:index]
+        g = Group(group)
+        g.writer(self.gff3)
+        return index
+
+   
+        
+
+if __name__ == "__main__":
+    main()
+
+
+    
\ No newline at end of file