diff gbk_to_fasta.py @ 4:bd5692103d5b draft

Uploaded
author rreumerman
date Fri, 05 Apr 2013 05:00:40 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gbk_to_fasta.py	Fri Apr 05 05:00:40 2013 -0400
@@ -0,0 +1,29 @@
+import sys
+
+if len(sys.argv) < 3:
+    exit("Not enough arguments passed, pleas provide names of input- and output file")
+
+input_name = sys.argv[1]
+output_name = sys.argv[2]
+
+from Bio import GenBank
+
+try: seq_record = GenBank.RecordParser().parse(open(input_name))
+except: exit("Error reading %s, check file correctness." % input_name)
+
+try: out_file = open(output_name, 'w')
+except IOError as e:
+    exit("Error trying to open '%s': {1}".format(e.errno, e.strerror))
+
+accession = definition = ''
+if seq_record.accession[0] != '': accession = '|gb|'+seq_record.accession[0]
+if seq_record.definition != '': definition = '|'+seq_record.definition
+
+out_file.write(">gi|%s%s%s\n" % (seq_record.gi,accession,definition))
+
+i = 0
+while i < len(seq_record.sequence):
+    out_file.write(seq_record.sequence[i:i+70]+"\n")
+    i += 70
+
+out_file.close()