comparison glimmer2seq.py @ 0:4da5f6bdcf12 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author bgruening
date Tue, 28 Nov 2017 10:10:13 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4da5f6bdcf12
1 #!/usr/bin/env python
2 """
3 Input: DNA FASTA file + Glimmer ORF file
4 Output: ORF sequences as FASTA file
5 Author: Bjoern Gruening
6 """
7 import sys
8
9 from Bio import SeqIO
10 from Bio.SeqRecord import SeqRecord
11
12
13 def glimmer2seq(glimmer_prediction=sys.argv[1], genome_sequence=sys.argv[2], outfile=sys.argv[3]):
14 if len(sys.argv) >= 4:
15 glimmerfile = open(glimmer_prediction, "r")
16 sequence = open(genome_sequence)
17 else:
18 print("Missing input values.")
19 sys.exit()
20
21 fastafile = SeqIO.parse(sequence, "fasta")
22
23 sequences = dict()
24 seq_records = list()
25 for entry in fastafile:
26 sequences[entry.description] = entry
27
28 for line in glimmerfile:
29 if line.startswith('>'):
30 entry = sequences[line[1:].strip()]
31 else:
32 orf_start = int(line[8:17])
33 orf_end = int(line[18:26])
34
35 orf_name = line[0:8]
36 if orf_start <= orf_end:
37 seq_records.append(SeqRecord(entry.seq[orf_start - 1:orf_end], id=orf_name, description=entry.description))
38 else:
39 seq_records.append(SeqRecord(entry.seq[orf_end - 1:orf_start].reverse_complement(), id=orf_name, description=entry.description))
40
41 SeqIO.write(seq_records, outfile, "fasta")
42 glimmerfile.close()
43 sequence.close()
44
45
46 if __name__ == "__main__":
47 glimmer2seq()