Mercurial > repos > bgruening > glimmer_gbk_to_orf
view glimmer2seq.py @ 0:6351903666da draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author | bgruening |
---|---|
date | Tue, 28 Nov 2017 10:07:28 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python """ Input: DNA FASTA file + Glimmer ORF file Output: ORF sequences as FASTA file Author: Bjoern Gruening """ import sys from Bio import SeqIO from Bio.SeqRecord import SeqRecord def glimmer2seq(glimmer_prediction=sys.argv[1], genome_sequence=sys.argv[2], outfile=sys.argv[3]): if len(sys.argv) >= 4: glimmerfile = open(glimmer_prediction, "r") sequence = open(genome_sequence) else: print("Missing input values.") sys.exit() fastafile = SeqIO.parse(sequence, "fasta") sequences = dict() seq_records = list() for entry in fastafile: sequences[entry.description] = entry for line in glimmerfile: if line.startswith('>'): entry = sequences[line[1:].strip()] else: orf_start = int(line[8:17]) orf_end = int(line[18:26]) orf_name = line[0:8] if orf_start <= orf_end: seq_records.append(SeqRecord(entry.seq[orf_start - 1:orf_end], id=orf_name, description=entry.description)) else: seq_records.append(SeqRecord(entry.seq[orf_end - 1:orf_start].reverse_complement(), id=orf_name, description=entry.description)) SeqIO.write(seq_records, outfile, "fasta") glimmerfile.close() sequence.close() if __name__ == "__main__": glimmer2seq()