0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Input: DNA FASTA file + Glimmer ORF file
|
|
4 Output: ORF sequences as FASTA file
|
|
5 Author: Bjoern Gruening
|
|
6 """
|
|
7 import sys, os
|
|
8 from Bio import SeqIO
|
|
9 from Bio.SeqRecord import SeqRecord
|
|
10
|
|
11 def glimmer2seq( glimmer_prediction = sys.argv [1], genome_sequence = sys.argv[2], outfile = sys.argv[3] ):
|
|
12 if len(sys.argv) >= 4:
|
|
13 glimmerfile = open( glimmer_prediction, "r")
|
|
14 sequence = open( genome_sequence )
|
|
15 else:
|
|
16 print "Missing input values."
|
|
17 sys.exit()
|
|
18
|
|
19 fastafile = SeqIO.parse(sequence, "fasta")
|
|
20
|
|
21 sequences = dict()
|
|
22 seq_records = list()
|
|
23 for entry in fastafile:
|
|
24 sequences[entry.description] = entry
|
|
25
|
|
26 for line in glimmerfile:
|
|
27 if line.startswith('>'):
|
|
28 entry = sequences[ line[1:].strip() ]
|
|
29 else:
|
|
30 orf_start = int(line[8:17])
|
|
31 orf_end = int(line[18:26])
|
|
32
|
|
33 orf_name = line[0:8]
|
|
34 if orf_start <= orf_end:
|
|
35 seq_records.append( SeqRecord( entry.seq[ orf_start-1 : orf_end ], id = orf_name, description = entry.description ) )
|
|
36 else:
|
|
37 seq_records.append( SeqRecord( entry.seq[ orf_end-1 : orf_start ].reverse_complement(), id = orf_name, description = entry.description ) )
|
|
38
|
|
39 SeqIO.write( seq_records, outfile, "fasta" )
|
|
40 glimmerfile.close()
|
|
41 sequence.close()
|
|
42
|
|
43 if __name__ == "__main__" :
|
|
44 glimmer2seq()
|