diff promer_substitutions.py @ 0:bca6b5cb87cd draft default tip

"planemo upload for repository https://github.com/phac-nml/promer commit 09ae227a84e31c9c56f58815b1a0c8c0e0a7900e"
author nml
date Tue, 17 Dec 2019 15:55:27 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/promer_substitutions.py	Tue Dec 17 15:55:27 2019 -0500
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+
+import os
+import subprocess
+import sys
+
+from Bio import SeqIO
+from Bio.SeqIO import FastaIO
+
+"""
+# =============================================================================
+Copyright Government of Canada 2018
+Written by: Eric Marinier, Public Health Agency of Canada,
+    National Microbiology Laboratory
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+this file except in compliance with the License. You may obtain a copy of the
+License at:
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software distributed
+under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+# =============================================================================
+"""
+
+__version__ = '0.2.0'
+
+FASTA_DIRECTORY = "fasta"
+
+REF_START = 2
+REF_END = 3
+
+HEADER_ROW = "[P1]\t[SUB]\t[SUB]\t[P2]\t[BUFF]\t[DIST]\
+    \t[R]\t[Q]\t[FRM]\t[FRM]\t[TAG]\t[TAG]\n"
+
+
+def reorient_file(fasta_location, start, end):
+
+    record = list(SeqIO.parse(fasta_location, "fasta"))[0]
+
+    # reversed
+    if start > end:
+        record.seq = record.seq[(end - 1):start].reverse_complement()
+
+    # same orientation
+    else:
+        record.seq = record.seq[(start - 1):end]
+
+    SeqIO.write(record, fasta_location, "fasta")
+
+
+def promer(reference_location, query_location):
+
+    # promer
+    subprocess.check_output(
+        ['promer', reference_location, query_location],
+        universal_newlines=True)
+
+
+# File locations from arguments:
+reference_location = sys.argv[1]
+query_location = sys.argv[2]
+
+# Make directories:
+if not os.path.exists(FASTA_DIRECTORY):
+    os.mkdir(FASTA_DIRECTORY)
+
+# Read query FASTA:
+query = list(SeqIO.parse(query_location, "fasta"))
+fasta_locations = []
+
+# Prepare final output:
+snps_file = open("snps.tab", "w")
+snps_file.write(HEADER_ROW)
+
+# Split the query FASTA file into multiple files, each with one record:
+# (This is done to work around a bug in promer.)
+for record in query:
+
+    output_name = str(record.id) + ".fasta"
+    output_location = os.path.join(FASTA_DIRECTORY, output_name)
+
+    fasta_locations.append(output_location)
+    SeqIO.write(record, output_location, "fasta")
+
+# Run promer on each (new) FASTA file:
+for fasta_location in fasta_locations:
+
+    promer(reference_location, fasta_location)
+
+    # show-coords
+    output = subprocess.check_output(
+        ['show-coords', '-THrcl', 'out.delta'], universal_newlines=True)
+
+    if not output:
+        continue
+
+    ref_start = int(output.split()[REF_START])
+    ref_end = int(output.split()[REF_END])
+
+    reorient_file(fasta_location, ref_start, ref_end)
+    promer(reference_location, fasta_location)
+
+    # show snps
+    output = str(subprocess.check_output(
+        ['show-snps', '-T', '-q', 'out.delta'], universal_newlines=True))
+    output = output.split("\n")[4:]
+    output = "\n".join(output)
+
+    snps_file.write(output)
+
+snps_file.close()
+
+# Write all FASTA files into one output:
+output_location = "contigs.fasta"
+records = []
+
+for fasta_location in fasta_locations:
+
+    record = list(SeqIO.parse(fasta_location, "fasta"))[0]
+    records.append(record)
+
+contigs_file = open(output_location, 'w')
+fasta_writer = FastaIO.FastaWriter(contigs_file, wrap=None)
+fasta_writer.write_file(records)
+contigs_file.close()