comparison glimmer2seq.py @ 0:841357e0acbf draft

Uploaded
author bgruening
date Sat, 06 Jul 2013 10:09:30 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:841357e0acbf
1 #!/usr/bin/env python
2 """
3 Input: DNA FASTA file + Glimmer ORF file
4 Output: ORF sequences as FASTA file
5 Author: Bjoern Gruening
6 """
7 import sys, os
8 from Bio import SeqIO
9 from Bio.SeqRecord import SeqRecord
10
11 def glimmer2seq( glimmer_prediction = sys.argv [1], genome_sequence = sys.argv[2], outfile = sys.argv[3] ):
12 if len(sys.argv) >= 4:
13 glimmerfile = open( glimmer_prediction, "r")
14 sequence = open( genome_sequence )
15 else:
16 print "Missing input values."
17 sys.exit()
18
19 fastafile = SeqIO.parse(sequence, "fasta")
20
21 sequences = dict()
22 seq_records = list()
23 for entry in fastafile:
24 sequences[entry.description] = entry
25
26 for line in glimmerfile:
27 if line.startswith('>'):
28 entry = sequences[ line[1:].strip() ]
29 else:
30 orf_start = int(line[8:17])
31 orf_end = int(line[18:26])
32
33 orf_name = line[0:8]
34 if orf_start <= orf_end:
35 seq_records.append( SeqRecord( entry.seq[ orf_start-1 : orf_end ], id = orf_name, description = entry.description ) )
36 else:
37 seq_records.append( SeqRecord( entry.seq[ orf_end-1 : orf_start ].reverse_complement(), id = orf_name, description = entry.description ) )
38
39 SeqIO.write( seq_records, outfile, "fasta" )
40 glimmerfile.close()
41 sequence.close()
42
43 if __name__ == "__main__" :
44 glimmer2seq()