comparison promer_substitutions.py @ 0:bca6b5cb87cd draft default tip

"planemo upload for repository https://github.com/phac-nml/promer commit 09ae227a84e31c9c56f58815b1a0c8c0e0a7900e"
author nml
date Tue, 17 Dec 2019 15:55:27 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bca6b5cb87cd
1 #!/usr/bin/env python
2
3 import os
4 import subprocess
5 import sys
6
7 from Bio import SeqIO
8 from Bio.SeqIO import FastaIO
9
10 """
11 # =============================================================================
12 Copyright Government of Canada 2018
13 Written by: Eric Marinier, Public Health Agency of Canada,
14 National Microbiology Laboratory
15 Licensed under the Apache License, Version 2.0 (the "License"); you may not use
16 this file except in compliance with the License. You may obtain a copy of the
17 License at:
18 http://www.apache.org/licenses/LICENSE-2.0
19 Unless required by applicable law or agreed to in writing, software distributed
20 under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
21 CONDITIONS OF ANY KIND, either express or implied. See the License for the
22 specific language governing permissions and limitations under the License.
23 # =============================================================================
24 """
25
26 __version__ = '0.2.0'
27
28 FASTA_DIRECTORY = "fasta"
29
30 REF_START = 2
31 REF_END = 3
32
33 HEADER_ROW = "[P1]\t[SUB]\t[SUB]\t[P2]\t[BUFF]\t[DIST]\
34 \t[R]\t[Q]\t[FRM]\t[FRM]\t[TAG]\t[TAG]\n"
35
36
37 def reorient_file(fasta_location, start, end):
38
39 record = list(SeqIO.parse(fasta_location, "fasta"))[0]
40
41 # reversed
42 if start > end:
43 record.seq = record.seq[(end - 1):start].reverse_complement()
44
45 # same orientation
46 else:
47 record.seq = record.seq[(start - 1):end]
48
49 SeqIO.write(record, fasta_location, "fasta")
50
51
52 def promer(reference_location, query_location):
53
54 # promer
55 subprocess.check_output(
56 ['promer', reference_location, query_location],
57 universal_newlines=True)
58
59
60 # File locations from arguments:
61 reference_location = sys.argv[1]
62 query_location = sys.argv[2]
63
64 # Make directories:
65 if not os.path.exists(FASTA_DIRECTORY):
66 os.mkdir(FASTA_DIRECTORY)
67
68 # Read query FASTA:
69 query = list(SeqIO.parse(query_location, "fasta"))
70 fasta_locations = []
71
72 # Prepare final output:
73 snps_file = open("snps.tab", "w")
74 snps_file.write(HEADER_ROW)
75
76 # Split the query FASTA file into multiple files, each with one record:
77 # (This is done to work around a bug in promer.)
78 for record in query:
79
80 output_name = str(record.id) + ".fasta"
81 output_location = os.path.join(FASTA_DIRECTORY, output_name)
82
83 fasta_locations.append(output_location)
84 SeqIO.write(record, output_location, "fasta")
85
86 # Run promer on each (new) FASTA file:
87 for fasta_location in fasta_locations:
88
89 promer(reference_location, fasta_location)
90
91 # show-coords
92 output = subprocess.check_output(
93 ['show-coords', '-THrcl', 'out.delta'], universal_newlines=True)
94
95 if not output:
96 continue
97
98 ref_start = int(output.split()[REF_START])
99 ref_end = int(output.split()[REF_END])
100
101 reorient_file(fasta_location, ref_start, ref_end)
102 promer(reference_location, fasta_location)
103
104 # show snps
105 output = str(subprocess.check_output(
106 ['show-snps', '-T', '-q', 'out.delta'], universal_newlines=True))
107 output = output.split("\n")[4:]
108 output = "\n".join(output)
109
110 snps_file.write(output)
111
112 snps_file.close()
113
114 # Write all FASTA files into one output:
115 output_location = "contigs.fasta"
116 records = []
117
118 for fasta_location in fasta_locations:
119
120 record = list(SeqIO.parse(fasta_location, "fasta"))[0]
121 records.append(record)
122
123 contigs_file = open(output_location, 'w')
124 fasta_writer = FastaIO.FastaWriter(contigs_file, wrap=None)
125 fasta_writer.write_file(records)
126 contigs_file.close()