Mercurial > repos > bgruening > glimmer_knowledge_based
comparison glimmer2seq.py @ 0:9b2e283dc3b5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
| author | bgruening |
|---|---|
| date | Tue, 28 Nov 2017 10:10:55 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9b2e283dc3b5 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Input: DNA FASTA file + Glimmer ORF file | |
| 4 Output: ORF sequences as FASTA file | |
| 5 Author: Bjoern Gruening | |
| 6 """ | |
| 7 import sys | |
| 8 | |
| 9 from Bio import SeqIO | |
| 10 from Bio.SeqRecord import SeqRecord | |
| 11 | |
| 12 | |
| 13 def glimmer2seq(glimmer_prediction=sys.argv[1], genome_sequence=sys.argv[2], outfile=sys.argv[3]): | |
| 14 if len(sys.argv) >= 4: | |
| 15 glimmerfile = open(glimmer_prediction, "r") | |
| 16 sequence = open(genome_sequence) | |
| 17 else: | |
| 18 print("Missing input values.") | |
| 19 sys.exit() | |
| 20 | |
| 21 fastafile = SeqIO.parse(sequence, "fasta") | |
| 22 | |
| 23 sequences = dict() | |
| 24 seq_records = list() | |
| 25 for entry in fastafile: | |
| 26 sequences[entry.description] = entry | |
| 27 | |
| 28 for line in glimmerfile: | |
| 29 if line.startswith('>'): | |
| 30 entry = sequences[line[1:].strip()] | |
| 31 else: | |
| 32 orf_start = int(line[8:17]) | |
| 33 orf_end = int(line[18:26]) | |
| 34 | |
| 35 orf_name = line[0:8] | |
| 36 if orf_start <= orf_end: | |
| 37 seq_records.append(SeqRecord(entry.seq[orf_start - 1:orf_end], id=orf_name, description=entry.description)) | |
| 38 else: | |
| 39 seq_records.append(SeqRecord(entry.seq[orf_end - 1:orf_start].reverse_complement(), id=orf_name, description=entry.description)) | |
| 40 | |
| 41 SeqIO.write(seq_records, outfile, "fasta") | |
| 42 glimmerfile.close() | |
| 43 sequence.close() | |
| 44 | |
| 45 | |
| 46 if __name__ == "__main__": | |
| 47 glimmer2seq() |
