Mercurial > repos > bgruening > glimmer3
diff glimmer2seq.py @ 0:841357e0acbf draft
Uploaded
author | bgruening |
---|---|
date | Sat, 06 Jul 2013 10:09:30 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glimmer2seq.py Sat Jul 06 10:09:30 2013 -0400 @@ -0,0 +1,44 @@ +#!/usr/bin/env python +""" +Input: DNA FASTA file + Glimmer ORF file +Output: ORF sequences as FASTA file +Author: Bjoern Gruening +""" +import sys, os +from Bio import SeqIO +from Bio.SeqRecord import SeqRecord + +def glimmer2seq( glimmer_prediction = sys.argv [1], genome_sequence = sys.argv[2], outfile = sys.argv[3] ): + if len(sys.argv) >= 4: + glimmerfile = open( glimmer_prediction, "r") + sequence = open( genome_sequence ) + else: + print "Missing input values." + sys.exit() + + fastafile = SeqIO.parse(sequence, "fasta") + + sequences = dict() + seq_records = list() + for entry in fastafile: + sequences[entry.description] = entry + + for line in glimmerfile: + if line.startswith('>'): + entry = sequences[ line[1:].strip() ] + else: + orf_start = int(line[8:17]) + orf_end = int(line[18:26]) + + orf_name = line[0:8] + if orf_start <= orf_end: + seq_records.append( SeqRecord( entry.seq[ orf_start-1 : orf_end ], id = orf_name, description = entry.description ) ) + else: + seq_records.append( SeqRecord( entry.seq[ orf_end-1 : orf_start ].reverse_complement(), id = orf_name, description = entry.description ) ) + + SeqIO.write( seq_records, outfile, "fasta" ) + glimmerfile.close() + sequence.close() + +if __name__ == "__main__" : + glimmer2seq()