annotate gbk_to_fasta.py @ 4:bd5692103d5b draft

Uploaded
author rreumerman
date Fri, 05 Apr 2013 05:00:40 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
1 import sys
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
2
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
3 if len(sys.argv) < 3:
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
4 exit("Not enough arguments passed, pleas provide names of input- and output file")
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
5
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
6 input_name = sys.argv[1]
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
7 output_name = sys.argv[2]
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
8
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
9 from Bio import GenBank
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
10
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
11 try: seq_record = GenBank.RecordParser().parse(open(input_name))
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
12 except: exit("Error reading %s, check file correctness." % input_name)
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
13
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
14 try: out_file = open(output_name, 'w')
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
15 except IOError as e:
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
16 exit("Error trying to open '%s': {1}".format(e.errno, e.strerror))
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
17
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
18 accession = definition = ''
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
19 if seq_record.accession[0] != '': accession = '|gb|'+seq_record.accession[0]
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
20 if seq_record.definition != '': definition = '|'+seq_record.definition
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
21
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
22 out_file.write(">gi|%s%s%s\n" % (seq_record.gi,accession,definition))
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
23
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
24 i = 0
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
25 while i < len(seq_record.sequence):
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
26 out_file.write(seq_record.sequence[i:i+70]+"\n")
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
27 i += 70
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
28
bd5692103d5b Uploaded
rreumerman
parents:
diff changeset
29 out_file.close()