Mercurial > repos > bgruening > glimmer3
comparison glimmer2seq.py @ 0:841357e0acbf draft
Uploaded
author | bgruening |
---|---|
date | Sat, 06 Jul 2013 10:09:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:841357e0acbf |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Input: DNA FASTA file + Glimmer ORF file | |
4 Output: ORF sequences as FASTA file | |
5 Author: Bjoern Gruening | |
6 """ | |
7 import sys, os | |
8 from Bio import SeqIO | |
9 from Bio.SeqRecord import SeqRecord | |
10 | |
11 def glimmer2seq( glimmer_prediction = sys.argv [1], genome_sequence = sys.argv[2], outfile = sys.argv[3] ): | |
12 if len(sys.argv) >= 4: | |
13 glimmerfile = open( glimmer_prediction, "r") | |
14 sequence = open( genome_sequence ) | |
15 else: | |
16 print "Missing input values." | |
17 sys.exit() | |
18 | |
19 fastafile = SeqIO.parse(sequence, "fasta") | |
20 | |
21 sequences = dict() | |
22 seq_records = list() | |
23 for entry in fastafile: | |
24 sequences[entry.description] = entry | |
25 | |
26 for line in glimmerfile: | |
27 if line.startswith('>'): | |
28 entry = sequences[ line[1:].strip() ] | |
29 else: | |
30 orf_start = int(line[8:17]) | |
31 orf_end = int(line[18:26]) | |
32 | |
33 orf_name = line[0:8] | |
34 if orf_start <= orf_end: | |
35 seq_records.append( SeqRecord( entry.seq[ orf_start-1 : orf_end ], id = orf_name, description = entry.description ) ) | |
36 else: | |
37 seq_records.append( SeqRecord( entry.seq[ orf_end-1 : orf_start ].reverse_complement(), id = orf_name, description = entry.description ) ) | |
38 | |
39 SeqIO.write( seq_records, outfile, "fasta" ) | |
40 glimmerfile.close() | |
41 sequence.close() | |
42 | |
43 if __name__ == "__main__" : | |
44 glimmer2seq() |