Mercurial > repos > nml > promer
diff promer_substitutions.py @ 0:bca6b5cb87cd draft default tip
"planemo upload for repository https://github.com/phac-nml/promer commit 09ae227a84e31c9c56f58815b1a0c8c0e0a7900e"
| author | nml |
|---|---|
| date | Tue, 17 Dec 2019 15:55:27 -0500 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/promer_substitutions.py Tue Dec 17 15:55:27 2019 -0500 @@ -0,0 +1,126 @@ +#!/usr/bin/env python + +import os +import subprocess +import sys + +from Bio import SeqIO +from Bio.SeqIO import FastaIO + +""" +# ============================================================================= +Copyright Government of Canada 2018 +Written by: Eric Marinier, Public Health Agency of Canada, + National Microbiology Laboratory +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at: +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +# ============================================================================= +""" + +__version__ = '0.2.0' + +FASTA_DIRECTORY = "fasta" + +REF_START = 2 +REF_END = 3 + +HEADER_ROW = "[P1]\t[SUB]\t[SUB]\t[P2]\t[BUFF]\t[DIST]\ + \t[R]\t[Q]\t[FRM]\t[FRM]\t[TAG]\t[TAG]\n" + + +def reorient_file(fasta_location, start, end): + + record = list(SeqIO.parse(fasta_location, "fasta"))[0] + + # reversed + if start > end: + record.seq = record.seq[(end - 1):start].reverse_complement() + + # same orientation + else: + record.seq = record.seq[(start - 1):end] + + SeqIO.write(record, fasta_location, "fasta") + + +def promer(reference_location, query_location): + + # promer + subprocess.check_output( + ['promer', reference_location, query_location], + universal_newlines=True) + + +# File locations from arguments: +reference_location = sys.argv[1] +query_location = sys.argv[2] + +# Make directories: +if not os.path.exists(FASTA_DIRECTORY): + os.mkdir(FASTA_DIRECTORY) + +# Read query FASTA: +query = list(SeqIO.parse(query_location, "fasta")) +fasta_locations = [] + +# Prepare final output: +snps_file = open("snps.tab", "w") +snps_file.write(HEADER_ROW) + +# Split the query FASTA file into multiple files, each with one record: +# (This is done to work around a bug in promer.) +for record in query: + + output_name = str(record.id) + ".fasta" + output_location = os.path.join(FASTA_DIRECTORY, output_name) + + fasta_locations.append(output_location) + SeqIO.write(record, output_location, "fasta") + +# Run promer on each (new) FASTA file: +for fasta_location in fasta_locations: + + promer(reference_location, fasta_location) + + # show-coords + output = subprocess.check_output( + ['show-coords', '-THrcl', 'out.delta'], universal_newlines=True) + + if not output: + continue + + ref_start = int(output.split()[REF_START]) + ref_end = int(output.split()[REF_END]) + + reorient_file(fasta_location, ref_start, ref_end) + promer(reference_location, fasta_location) + + # show snps + output = str(subprocess.check_output( + ['show-snps', '-T', '-q', 'out.delta'], universal_newlines=True)) + output = output.split("\n")[4:] + output = "\n".join(output) + + snps_file.write(output) + +snps_file.close() + +# Write all FASTA files into one output: +output_location = "contigs.fasta" +records = [] + +for fasta_location in fasta_locations: + + record = list(SeqIO.parse(fasta_location, "fasta"))[0] + records.append(record) + +contigs_file = open(output_location, 'w') +fasta_writer = FastaIO.FastaWriter(contigs_file, wrap=None) +fasta_writer.write_file(records) +contigs_file.close()
