Mercurial > repos > nml > promer
comparison promer_substitutions.py @ 0:bca6b5cb87cd draft default tip
"planemo upload for repository https://github.com/phac-nml/promer commit 09ae227a84e31c9c56f58815b1a0c8c0e0a7900e"
| author | nml |
|---|---|
| date | Tue, 17 Dec 2019 15:55:27 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bca6b5cb87cd |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import os | |
| 4 import subprocess | |
| 5 import sys | |
| 6 | |
| 7 from Bio import SeqIO | |
| 8 from Bio.SeqIO import FastaIO | |
| 9 | |
| 10 """ | |
| 11 # ============================================================================= | |
| 12 Copyright Government of Canada 2018 | |
| 13 Written by: Eric Marinier, Public Health Agency of Canada, | |
| 14 National Microbiology Laboratory | |
| 15 Licensed under the Apache License, Version 2.0 (the "License"); you may not use | |
| 16 this file except in compliance with the License. You may obtain a copy of the | |
| 17 License at: | |
| 18 http://www.apache.org/licenses/LICENSE-2.0 | |
| 19 Unless required by applicable law or agreed to in writing, software distributed | |
| 20 under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |
| 21 CONDITIONS OF ANY KIND, either express or implied. See the License for the | |
| 22 specific language governing permissions and limitations under the License. | |
| 23 # ============================================================================= | |
| 24 """ | |
| 25 | |
| 26 __version__ = '0.2.0' | |
| 27 | |
| 28 FASTA_DIRECTORY = "fasta" | |
| 29 | |
| 30 REF_START = 2 | |
| 31 REF_END = 3 | |
| 32 | |
| 33 HEADER_ROW = "[P1]\t[SUB]\t[SUB]\t[P2]\t[BUFF]\t[DIST]\ | |
| 34 \t[R]\t[Q]\t[FRM]\t[FRM]\t[TAG]\t[TAG]\n" | |
| 35 | |
| 36 | |
| 37 def reorient_file(fasta_location, start, end): | |
| 38 | |
| 39 record = list(SeqIO.parse(fasta_location, "fasta"))[0] | |
| 40 | |
| 41 # reversed | |
| 42 if start > end: | |
| 43 record.seq = record.seq[(end - 1):start].reverse_complement() | |
| 44 | |
| 45 # same orientation | |
| 46 else: | |
| 47 record.seq = record.seq[(start - 1):end] | |
| 48 | |
| 49 SeqIO.write(record, fasta_location, "fasta") | |
| 50 | |
| 51 | |
| 52 def promer(reference_location, query_location): | |
| 53 | |
| 54 # promer | |
| 55 subprocess.check_output( | |
| 56 ['promer', reference_location, query_location], | |
| 57 universal_newlines=True) | |
| 58 | |
| 59 | |
| 60 # File locations from arguments: | |
| 61 reference_location = sys.argv[1] | |
| 62 query_location = sys.argv[2] | |
| 63 | |
| 64 # Make directories: | |
| 65 if not os.path.exists(FASTA_DIRECTORY): | |
| 66 os.mkdir(FASTA_DIRECTORY) | |
| 67 | |
| 68 # Read query FASTA: | |
| 69 query = list(SeqIO.parse(query_location, "fasta")) | |
| 70 fasta_locations = [] | |
| 71 | |
| 72 # Prepare final output: | |
| 73 snps_file = open("snps.tab", "w") | |
| 74 snps_file.write(HEADER_ROW) | |
| 75 | |
| 76 # Split the query FASTA file into multiple files, each with one record: | |
| 77 # (This is done to work around a bug in promer.) | |
| 78 for record in query: | |
| 79 | |
| 80 output_name = str(record.id) + ".fasta" | |
| 81 output_location = os.path.join(FASTA_DIRECTORY, output_name) | |
| 82 | |
| 83 fasta_locations.append(output_location) | |
| 84 SeqIO.write(record, output_location, "fasta") | |
| 85 | |
| 86 # Run promer on each (new) FASTA file: | |
| 87 for fasta_location in fasta_locations: | |
| 88 | |
| 89 promer(reference_location, fasta_location) | |
| 90 | |
| 91 # show-coords | |
| 92 output = subprocess.check_output( | |
| 93 ['show-coords', '-THrcl', 'out.delta'], universal_newlines=True) | |
| 94 | |
| 95 if not output: | |
| 96 continue | |
| 97 | |
| 98 ref_start = int(output.split()[REF_START]) | |
| 99 ref_end = int(output.split()[REF_END]) | |
| 100 | |
| 101 reorient_file(fasta_location, ref_start, ref_end) | |
| 102 promer(reference_location, fasta_location) | |
| 103 | |
| 104 # show snps | |
| 105 output = str(subprocess.check_output( | |
| 106 ['show-snps', '-T', '-q', 'out.delta'], universal_newlines=True)) | |
| 107 output = output.split("\n")[4:] | |
| 108 output = "\n".join(output) | |
| 109 | |
| 110 snps_file.write(output) | |
| 111 | |
| 112 snps_file.close() | |
| 113 | |
| 114 # Write all FASTA files into one output: | |
| 115 output_location = "contigs.fasta" | |
| 116 records = [] | |
| 117 | |
| 118 for fasta_location in fasta_locations: | |
| 119 | |
| 120 record = list(SeqIO.parse(fasta_location, "fasta"))[0] | |
| 121 records.append(record) | |
| 122 | |
| 123 contigs_file = open(output_location, 'w') | |
| 124 fasta_writer = FastaIO.FastaWriter(contigs_file, wrap=None) | |
| 125 fasta_writer.write_file(records) | |
| 126 contigs_file.close() |
