Mercurial > repos > bgruening > glimmer_knowledge_based
view glimmer2seq.py @ 1:febc61f3c67d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
author | iuc |
---|---|
date | Tue, 30 Oct 2018 18:52:08 -0400 |
parents | 9b2e283dc3b5 |
children |
line wrap: on
line source
#!/usr/bin/env python """ Input: DNA FASTA file + Glimmer ORF file Output: ORF sequences as FASTA file Author: Bjoern Gruening """ import sys from Bio import SeqIO from Bio.SeqRecord import SeqRecord def glimmer2seq(glimmer_prediction=sys.argv[1], genome_sequence=sys.argv[2], outfile=sys.argv[3]): if len(sys.argv) >= 4: glimmerfile = open(glimmer_prediction, "r") sequence = open(genome_sequence) else: print("Missing input values.") sys.exit() fastafile = SeqIO.parse(sequence, "fasta") sequences = dict() seq_records = list() for entry in fastafile: sequences[entry.description] = entry for line in glimmerfile: if line.startswith('>'): entry = sequences[line[1:].strip()] else: orf_start = int(line[8:17]) orf_end = int(line[18:26]) orf_name = line[0:8] if orf_start <= orf_end: seq_records.append(SeqRecord(entry.seq[orf_start - 1:orf_end], id=orf_name, description=entry.description)) else: seq_records.append(SeqRecord(entry.seq[orf_end - 1:orf_start].reverse_complement(), id=orf_name, description=entry.description)) SeqIO.write(seq_records, outfile, "fasta") glimmerfile.close() sequence.close() if __name__ == "__main__": glimmer2seq()