Mercurial > repos > dcouvin > pmlst2
diff pmlst/pmlst.py @ 0:140d4f9e1f20 draft default tip
Uploaded
author | dcouvin |
---|---|
date | Mon, 06 Sep 2021 16:00:46 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pmlst/pmlst.py Mon Sep 06 16:00:46 2021 +0000 @@ -0,0 +1,691 @@ +#!/usr/bin/env python3 + +import os, sys, re, time, pprint, io, shutil +import argparse, subprocess + +from cgecore.alignment import extended_cigar +from cgecore.blaster.blaster import Blaster +from cgecore.cgefinder import CGEFinder +import json, gzip +from tabulate import tabulate + + +def get_read_filename(infiles): + ''' Infiles must be a list with 1 or 2 input files. + Removes path from given string and removes extensions: + .fq .fastq .gz and .trim + extract the common sample name i 2 files are given. + ''' + # Remove common fastq extensions + seq_path = infiles[0] + seq_file = os.path.basename(seq_path) + seq_file = seq_file.replace(".fq", "") + seq_file = seq_file.replace(".fastq", "") + seq_file = seq_file.replace(".gz", "") + seq_file = seq_file.replace(".trim", "") + if len(infiles) == 1: + return seq_file.rstrip() + + # If two files are given get the common sample name + sample_name = "" + seq_file_2 = os.path.basename(infiles[1]) + for i in range(len(seq_file)): + if seq_file_2[i] == seq_file[i]: + sample_name += seq_file[i] + else: + break + if sample_name == "": + sys.error("Input error: sample names of input files, {} and {}, \ + does not share a common sample name. If these files \ + are paired end reads from the same sample, please rename \ + them with a common sample name (e.g. 's22_R1.fq', 's22_R2.fq') \ + or input them seperately.".format(infiles[0], infiles[1])) + + return sample_name.rstrip("-").rstrip("_") + +def is_gzipped(file_path): + ''' Returns True if file is gzipped and False otherwise. + The result is inferred from the first two bits in the file read + from the input path. + On unix systems this should be: 1f 8b + Theoretically there could be exceptions to this test but it is + unlikely and impossible if the input files are otherwise expected + to be encoded in utf-8. + ''' + with open(file_path, mode='rb') as fh: + bit_start = fh.read(2) + if(bit_start == b'\x1f\x8b'): + return True + else: + return False + +def get_file_format(input_files): + """ + Takes all input files and checks their first character to assess + the file format. Returns one of the following strings; fasta, fastq, + other or mixed. fasta and fastq indicates that all input files are + of the same format, either fasta or fastq. other indiates that all + files are not fasta nor fastq files. mixed indicates that the inputfiles + are a mix of different file formats. + """ + + # Open all input files and get the first character + file_format = [] + invalid_files = [] + for infile in input_files: + if is_gzipped(infile):#[-3:] == ".gz": + f = gzip.open(infile, "rb") + fst_char = f.read(1); + else: + f = open(infile, "rb") + fst_char = f.read(1); + f.close() + # Assess the first character + if fst_char == b"@": + file_format.append("fastq") + elif fst_char == b">": + file_format.append("fasta") + else: + invalid_files.append("other") + if len(set(file_format)) != 1: + return "mixed" + return ",".join(set(file_format)) + +def import_profile(database, scheme, loci_list): + """Import all possible allele profiles with corresponding st's + for the scheme into a dict. The profiles are stored in a dict + of dicts, to easily look up what st types are accosiated with + a specific allele number of each loci. + """ + # Open allele profile file from databaseloci + profile_file = open("{0}/{1}.txt.clean".format(database, scheme), "r") + profile_header = profile_file.readline().strip().split("\t")[1:len(loci_list)+1] + + # Create dict for looking up st-types with locus/allele combinations + st_profiles = {} + # For each locus initate make an inner dict to store allele and st's + for locus in loci_list: + st_profiles[locus] = {} + + # Fill inner dict with allele no as key and st-types seen with the allele as value + for line in profile_file: + profile = line.strip().split("\t") + st_name = profile[0] + allele_list = profile[1:len(loci_list)+1] + + # Go through all allele profiles. Save locus-allele combination with the st-type + for i in range(len(allele_list)): + allele = allele_list[i] + locus = profile_header[i] + if allele in st_profiles[locus]: + st_profiles[locus][allele] += [st_name] + else: + st_profiles[locus][allele] = [st_name] + profile_file.close() + + return st_profiles + +def st_typing(st_profiles, allele_matches, loci_list): + """ + Takes the path to a dictionary, the inp list of the allele + number that each loci has been assigned, and an output file string + where the found st type and similaity is written into it. + """ + + # Find best ST type for all allele profiles + st_output = "" + note = "" + + # First line contains matrix column headers, which are the specific loci + st_hits = [] + st_marks = [] + note = "" + + # Check the quality of the alle hits + for locus in allele_matches: + allele = allele_matches[locus]["allele"] + + # Check if allele is marked as a non-perfect match. Save mark and write note. + if "?*" in allele: + note += "?* {}: Imperfect hit, ST can not be trusted!\n".format(locus) + st_marks = ["?","*"] + elif "?" in allele: + note += "? {}: Uncertain hit, ST can not be trusted.\n".format(locus) + st_marks.append("?") + elif "*" in allele: + note += "* {}: Novel allele, ST may indicate nearest ST.\n".format(locus) + st_marks.append("*") + + # Remove mark from allele so it can be used to look up nearest st types + allele = allele.rstrip("*?!") + + # Get all st's that have the alleles in it's allele profile + st_hits += st_profiles[locus].get(allele, ["None"]) + if "alternative_hit" in allele_matches[locus] and allele_matches[locus]["alternative_hit"] != {}: + note += "! {}: Multiple perfect hits found\n".format(locus) + st_marks.append("!") + for allele_name, hit_info in allele_matches[locus]["alternative_hit"].items(): + allele = hit_info["allele"].rstrip("!") + st_hits += st_profiles[locus].get(allele, ["None"]) + + # Save allele marks to be transfered to the ST + st_mark = "".join(set(st_marks)) + notes = st_mark + # Add marks information to notes + if "!" in st_mark: + notes += " alleles with multiple perfect hits found, multiple STs might be found\n" + if "*" in st_mark and "?" in st_mark: + notes += " alleles with less than 100% identity and 100% coverages found\n" + elif st_mark == "*": + notes = st_mark + " alleles with less than 100% identity found\n" + elif st_mark == "?": + notes = st_mark + " alleles with less than 100% coverage found\n" + notes += note + + # Find most frequent st in st_hits + st_hits_counter = {} + max_count = 0 + best_hit = "" + for hit in st_hits: + if hit is not "None": + if hit in st_hits_counter: + st_hits_counter[hit] += 1 + else: + st_hits_counter[hit] = 1 + if max_count < st_hits_counter[hit]: + max_count = st_hits_counter[hit] + best_hit = hit + + # Check if allele profile match found st 100 % + similarity = round(float(max_count)/len(loci_list)*100, 2) + + if similarity != 100: + st = "Unknown" + nearest_sts = [] + # If st is not perfect find nearest st's + for st_hit, allele_score in sorted(st_hits_counter.items(), key=lambda x: x[1], reverse=True): + if allele_score < max_count: + break + nearest_sts.append(st_hit) + nearest_sts = ",".join(nearest_sts) #+ st_mark + else: + # allele profile has a perfect ST hit but the st marks given to the alleles might indicate imperfect hits + sts = [st for st, no in st_hits_counter.items() if no == max_count] + #if len(sts) > 1: + st = "{},".format(st_mark).join(sts) + st_mark + #st = best_hit + st_mark + nearest_sts = "" + + return st, notes, nearest_sts + +def make_aln(scheme, file_handle, allele_matches, query_aligns, homol_aligns, sbjct_aligns): + for locus, locus_info in allele_matches.items(): + allele_name = locus_info["allele_name"] + if allele_name == "No hit found": + continue + hit_name = locus_info["hit_name"] + + seqs = ["","",""] + seqs[0] = sbjct_aligns[scheme][hit_name] + seqs[1] = homol_aligns[scheme][hit_name] + seqs[2] = query_aligns[scheme][hit_name] + + write_align(seqs, allele_name, file_handle) + + + # write alternative seq + if "alternative_hit" in locus_info: + for allele_name in locus_info["alternative_hit"]: + hit_name = locus_info["alternative_hit"][allele_name]["hit_name"] + seqs = ["","",""] + seqs[0] = sbjct_aligns[scheme][hit_name] + seqs[1] = homol_aligns[scheme][hit_name] + seqs[2] = query_aligns[scheme][hit_name] + + write_align(seqs, allele_name, file_handle) + +def write_align(seq, seq_name, file_handle): + file_handle.write("# {}".format(seq_name) + "\n") + sbjct_seq = seq[0] + homol_seq = seq[1] + query_seq = seq[2] + for i in range(0,len(sbjct_seq),60): + file_handle.write("%-10s\t%s\n"%("template:", sbjct_seq[i:i+60])) + file_handle.write("%-10s\t%s\n"%("", homol_seq[i:i+60])) + file_handle.write("%-10s\t%s\n\n"%("query:", query_seq[i:i+60])) + +def text_table(headers, rows, empty_replace='-'): + ''' Create text table + + USAGE: + >>> from tabulate import tabulate + >>> headers = ['A','B'] + >>> rows = [[1,2],[3,4]] + >>> print(text_table(headers, rows)) + ********** + A B + ********** + 1 2 + 3 4 + ========== + ''' + # Replace empty cells with placeholder + rows = map(lambda row: map(lambda x: x if x else empty_replace, row), rows) + # Create table + table = tabulate(rows, headers, tablefmt='simple').split('\n') + # Prepare title injection + width = len(table[0]) + # Switch horisontal line + table[1] = '*'*(width+2) + # Update table with title + table = ("%s\n"*3)%('*'*(width+2), '\n'.join(table), '='*(width+2)) + return table + + +parser = argparse.ArgumentParser(description="") +# Arguments +parser.add_argument("-i", "--infile", + help="FASTA or FASTQ files to do pMLST on.", + nargs="+", required=True) +parser.add_argument("-o", "--outdir", + help="Output directory.", + default=".") +parser.add_argument("-s", "--scheme", + help="scheme database used for pMLST prediction", required=True) +parser.add_argument("-p", "--database", + help="Directory containing the databases.", default="/database") +parser.add_argument("-t", "--tmp_dir", + help="Temporary directory for storage of the results\ + from the external software.", + default="tmp_pMLST") +parser.add_argument("-mp", "--method_path", + help="Path to the method to use (kma or blastn)\ + if assembled contigs are inputted the path to executable blastn should be given,\ + if fastq files are given path to executable kma should be given") +parser.add_argument("-x", "--extented_output", + help="Give extented output with allignment files, template and query hits in fasta and\ + a tab seperated file with allele profile results", action="store_true") +parser.add_argument("-q", "--quiet", action="store_true") + + +#parser.add_argument("-c", "--coverage", +# help="Minimum template coverage required", default = 0.6) +#parser.add_argument("-i", "--identity", +# help="Minimum template identity required", default = 0.9) +args = parser.parse_args() + +if args.quiet: + f = open(os.devnull, 'w') + sys.stdout = f + + +#TODO what are the clonal complex data used for?? + +# TODO error handling +infile = args.infile +# Check that outdir is an existing dir... +outdir = os.path.abspath(args.outdir) +scheme = args.scheme +database = os.path.abspath(args.database) +tmp_dir = os.path.abspath(args.tmp_dir) +# Check if method path is executable +method_path = args.method_path +extented_output = args.extented_output + +min_cov = 0.6 # args.coverage +threshold = 0.95 # args.identity + +# Check file format (fasta, fastq or other format) +file_format = get_file_format(infile) + +db_path = "{}/".format(database, scheme) + +config_file = open(database + "/config","r") + +# Get profile_name from config file +scheme_list = [] +for line in config_file: + if line.startswith("#"): + continue + line = line.split("\t") + scheme_list.append(line[0]) + if line[0] == scheme: + profile_name = line[1] + +config_file.close() + +if scheme not in scheme_list: + sys.exit("{}, is not a valid scheme. \n\nPlease choose a scheme available in the database:\n{}".format(scheme, ", ".join(scheme_list))) + +# Get loci list from allele profile file +with open("{0}/{1}.txt.clean".format(database, scheme), "r") as st_file: + file_header = st_file.readline().strip().split("\t") + loci_list = file_header[1:] + +# Call appropriate method (kma or blastn) based on file format +if file_format == "fastq": + if not method_path: + method_path = "kma" + if shutil.which(method_path) == None: + sys.exit("No valid path to a kma program was provided. Use the -mp flag to provide the path.") + # Check the number of files + if len(infile) == 1: + infile_1 = infile[0] + infile_2 = None + elif len(infile) == 2: + infile_1 = infile[0] + infile_2 = infile[1] + else: + sys.exit("Only 2 input file accepted for raw read data,\ + if data from more runs is avaliable for the same\ + sample, please concatinate the reads into two files") + + sample_name = get_read_filename(infile) + method = "kma" + + # Call KMA + method_obj = CGEFinder.kma(infile_1, outdir, [scheme], db_path, min_cov=min_cov, + threshold=threshold, kma_path=method_path, sample_name=sample_name, + inputfile_2=infile_2, kma_mrs=0.75, kma_gapopen=-5, + kma_gapextend=-1, kma_penalty=-3, kma_reward=1) + +elif file_format == "fasta": + if not method_path: + method_path = "blastn" + if shutil.which(method_path) == None: + sys.exit("No valid path to a blastn program was provided. Use the -mp flag to provide the path.") + # Assert that only one fasta file is inputted + assert len(infile) == 1, "Only one input file accepted for assembled data." + infile = infile[0] + method = "blast" + + # Call BLASTn + method_obj = Blaster(infile, [scheme], db_path, tmp_dir, + min_cov, threshold, method_path, cut_off=False) + #allewed_overlap=50) +else: + sys.exit("Input file must be fastq or fasta format, not "+ file_format) + +results = method_obj.results +query_aligns = method_obj.gene_align_query +homol_aligns = method_obj.gene_align_homo +sbjct_aligns = method_obj.gene_align_sbjct + +# Check that the results dict is not empty +warning = "" +if results[scheme] == "No hit found": + results[scheme] = {} + warning = ("No MLST loci was found in the input data, " + "make sure that the correct pMLST scheme was chosen.") + + +allele_matches = {} + +# Get the found allele profile contained in the results dict +for hit, locus_hit in results[scheme].items(): + + # Get allele number for locus + allele_name = locus_hit["sbjct_header"] + allele_obj = re.search("(\w+)[_|-](\w+$)", allele_name) + + # Get variable to later storage in the results dict + locus = allele_obj.group(1) + allele = allele_obj.group(2) + coverage = float(locus_hit["perc_coverage"]) + identity = float(locus_hit["perc_ident"]) + score = float(locus_hit["cal_score"]) + gaps = int(locus_hit["gaps"]) + align_len = locus_hit["HSP_length"] + sbj_len = int(locus_hit["sbjct_length"]) + sbjct_seq = locus_hit["sbjct_string"] + query_seq = locus_hit["query_string"] + homol_seq = locus_hit["homo_string"] + cigar = extended_cigar(sbjct_aligns[scheme][hit], query_aligns[scheme][hit]) + + # Check for perfect hits + if coverage == 100 and identity == 100: + # If a perfect hit was already found the list more_perfect hits will exist this new hit is appended to this list + try: + allele_matches[locus]["alternative_hit"][allele_name] = {"allele":allele+"!", "align_len":align_len, "sbj_len":sbj_len, + "coverage":coverage, "identity":identity, "hit_name":hit} + if allele_matches[locus]["allele"][-1] != "!": + allele_matches[locus]["allele"] += "!" + except KeyError: + # Overwrite alleles already saved, save the perfect match and break to go to next locus + allele_matches[locus] = {"score":score, "allele":allele, "coverage":coverage, + "identity":identity, "match_priority": 1, "align_len":align_len, + "gaps":gaps, "sbj_len":sbj_len, "allele_name":allele_name, + "sbjct_seq":sbjct_seq, "query_seq":query_seq, "homol_seq":homol_seq, + "hit_name":hit, "cigar":cigar, "alternative_hit":{}} + else: + # If no hit has yet been stored initialize dict variables that are looked up below + if locus not in allele_matches: + allele_matches[locus] = {"score":0, "match_priority": 4} + + # We weight full coverage higher than perfect identity match + if coverage == 100 and identity != 100: + # Check that better (higher prioritized) 100% coverage hit has not been stored yet + if allele_matches[locus]["match_priority"] > 2 or (allele_matches[locus]["match_priority"] == 2 and score > allele_matches[locus]["score"]): + allele_matches[locus] = {"score":score, "allele":allele+"*", "coverage":coverage, + "identity":identity, "match_priority": 2, "align_len":align_len, + "gaps":gaps, "sbj_len":sbj_len, "allele_name":allele_name, + "sbjct_seq":sbjct_seq, "query_seq":query_seq, "homol_seq":homol_seq, + "hit_name":hit, "cigar":cigar} + elif coverage != 100 and identity == 100: + # Check that higher prioritized hit was not already stored + if allele_matches[locus]["match_priority"] > 3 or (allele_matches[locus]["match_priority"] == 3 and score > allele_matches[locus]["score"]): + allele_matches[locus] = {"score":score, "allele":allele + "?", "coverage":coverage, + "identity":identity, "match_priority": 3, "align_len":align_len, + "gaps":gaps, "sbj_len":sbj_len, "allele_name":allele_name, + "sbjct_seq":sbjct_seq, "query_seq":query_seq, "homol_seq":homol_seq, + "hit_name":hit, "cigar":cigar} + else: # coverage != 100 and identity != 100: + if allele_matches[locus]["match_priority"] == 4 and score > allele_matches[locus]["score"]: + allele_matches[locus] = {"score":score, "allele":allele + "?*", "coverage":coverage, + "identity":identity, "match_priority": 4, "align_len":align_len, + "gaps":gaps, "sbj_len":sbj_len, "allele_name":allele_name, + "sbjct_seq":sbjct_seq, "query_seq":query_seq, "homol_seq":homol_seq, + "hit_name":hit, "cigar":cigar} +for locus in loci_list: + if locus not in allele_matches: + allele_matches[locus] = {"identity":"", "coverage":"", "allele":"", "allele_name":"No hit found", "align_len":"", "gaps":"", "sbj_len":""} + +# Import all possible st profiles into dict +st_profiles = import_profile(database, scheme,loci_list) + +# Find st or neatest sts +st, note, nearest_sts = st_typing(st_profiles, allele_matches, loci_list) + +# Give warning of mlst schene if no loci were found +if note == "" and warning != "": + note = warning + +# Set ST for incF +if scheme.lower() == "incf": + st = ["F","A", "B"] + if "FII" in allele_matches and allele_matches["FII"]["identity"] == 100.0: + st[0] += allele_matches["FII"]["allele_name"].split("_")[-1] + elif "FIC" in allele_matches and allele_matches["FIC"]["identity"] == 100.0: + st[0] = "C" + allele_matches["FIC"]["allele_name"].split("_")[-1] + elif "FIIK" in allele_matches and allele_matches["FIIK"]["identity"] == 100.0: + st[0] = "K" + allele_matches["FIIK"]["allele_name"].split("_")[-1] + elif "FIIS" in allele_matches and allele_matches["FIIS"]["identity"] == 100.0: + st[0] = "S" + allele_matches["FIIS"]["allele_name"].split("_")[-1] + elif "FIIY" in allele_matches and allele_matches["FIIY"]["identity"] == 100.0: + st[0] = "Y" + allele_matches["FIIY"]["allele_name"].split("_")[-1] + else: + st[0] += "-" + + if "FIA" in allele_matches and allele_matches["FIA"]["identity"] == 100.0: + st[1] += allele_matches["FIA"]["allele_name"].split("_")[-1] + else: + st[1] += "-" + + if "FIB" in allele_matches and allele_matches["FIB"]["identity"] == 100.0: + st[2] += allele_matches["FIB"]["allele_name"].split("_")[-1] + else: + st[2] += "-" + + st = "["+":".join(st)+"]" + + +# Check if ST is associated with a clonal complex. +clpx = "" +if st != "Unknown" or nearest_sts != "": + with open("{0}/{1}.clpx".format(database,scheme), "r") as clpx_file: + for line in clpx_file: + line = line.split("\t") + if st[0] == line[0] or nearest_sts == line[0]: + clpx = line[1].strip() + + +# Get run info for JSON file +service = os.path.basename(__file__).replace(".py", "") +date = time.strftime("%d.%m.%Y") +time = time.strftime("%H:%M:%S") + +# TODO find a system to show the database and service version using git + +# Make JSON output file +data = {service:{}} +allele_results = {} +for locus, locus_info in allele_matches.items(): + allele_results[locus] = {"identity":0, "coverage":0, "allele":[], "allele_name":[], "align_len":[], "gaps":0, "sbj_len":[]} + for (key, value) in locus_info.items(): + if key in allele_results[locus] or (key == "alternative_hit" and value != {}): + allele_results[locus][key] = value + +userinput = {"filename":args.infile, "scheme":args.scheme, "profile":profile_name,"file_format":file_format} +run_info = {"date":date, "time":time}#, "database":{"remote_db":remote_db, "last_commit_hash":head_hash}} +server_results = {"sequence_type":st, "allele_profile": allele_results, + "nearest_sts":nearest_sts,"clonal_complex":clpx, "notes":note} + +data[service]["user_input"] = userinput +data[service]["run_info"] = run_info +data[service]["results"] = server_results + +pprint.pprint(data) + +# Save json output +result_file = "{}/data.json".format(outdir) +with open(result_file, "w") as outfile: + json.dump(data, outfile) + +if extented_output: + # Define extented output + table_filename = "{}/results_tab.tsv".format(outdir) + query_filename = "{}/Hit_in_genome_seq.fsa".format(outdir) + sbjct_filename = "{}/pMLST_allele_seq.fsa".format(outdir) + result_filename = "{}/results.txt".format(outdir) + table_file = open(table_filename, "w") + query_file = open(query_filename, "w") + sbjct_file = open(sbjct_filename, "w") + result_file = open(result_filename, "w") + + # Make results file + result_file.write("{0} Results\n\n".format(service)) + result_file.write("pMLST profile: {}\n\nSequence Type: {}\n".format(profile_name, st)) + # If ST is unknown report nearest ST + if st == "Unknown" and nearest_sts != "": + if len(nearest_sts.split(",")) == 1: + result_file.write("Nearest ST: {}\n".format(nearest_sts)) + else: + result_file.write("Nearest STs: {}\n".format(nearest_sts)) + + # Report clonal complex if one was associated with ST: + if clpx != "": + result_file.write("Clonal complex: {}\n".format(clpx)) + + # Write tsv table header + table_header = ["Locus", "Identity", "Coverage", "Alignment Length", "Allele Length", "Gaps", "Allele"] + table_file.write("\t".join(table_header) + "\n") + rows = [] + for locus, allele_info in allele_matches.items(): + + identity = str(allele_info["identity"]) + coverage = str(allele_info["coverage"]) + allele = allele_info["allele"] + allele_name = allele_info["allele_name"] + align_len = str(allele_info["align_len"]) + sbj_len = str(allele_info["sbj_len"]) + gaps = str(allele_info["gaps"]) + + # Write alleles names with indications of imperfect hits + if allele_name != "No hit found": + allele_name_w_mark = locus + "_" + allele + else: + allele_name_w_mark = allele_name + + # Write allele results to tsv table + row = [locus, identity, coverage, align_len, sbj_len, gaps, allele_name_w_mark] + rows.append(row) + if "alternative_hit" in allele_info: + for allele_name, dic in allele_info["alternative_hit"].items(): + row = [locus, identity, coverage, str(dic["align_len"]), str(dic["sbj_len"]), "0", allele_name + "!"] + rows.append(row) + # + + if allele_name == "No hit found": + continue + + # Write query fasta output + hit_name = allele_info["hit_name"] + query_seq = query_aligns[scheme][hit_name] + sbjct_seq = sbjct_aligns[scheme][hit_name] + homol_seq = homol_aligns[scheme][hit_name] + + if allele_info["match_priority"] == 1: + match = "PERFECT MATCH" + else: + match = "WARNING" + header = ">{}:{} ID:{}% COV:{}% Best_match:{}\n".format(locus, match, allele_info["identity"], + allele_info["coverage"], allele_info["allele_name"]) + query_file.write(header) + for i in range(0,len(query_seq),60): + query_file.write(query_seq[i:i+60] + "\n") + + # Write template fasta output + header = ">{}\n".format(allele_info["allele_name"]) + sbjct_file.write(header) + for i in range(0,len(sbjct_seq),60): + sbjct_file.write(sbjct_seq[i:i+60] + "\n") + + if "alternative_hit" in allele_info: + for allele_name in allele_info["alternative_hit"]: + header = ">{}:{} ID:{}% COV:{}% Best_match:{}\n".format(locus, "PERFECT MATCH", 100, + 100, allele_name) + hit_name = allele_info["alternative_hit"][allele_name]["hit_name"] + query_seq = query_aligns[scheme][hit_name] + sbjct_seq = sbjct_aligns[scheme][hit_name] + homol_seq = homol_aligns[scheme][hit_name] + query_file.write(header) + for i in range(0,len(query_seq),60): + query_file.write(query_seq[i:i+60] + "\n") + + # Write template fasta output + header = ">{}\n".format(allele_name) + sbjct_file.write(header) + for i in range(0,len(sbjct_seq),60): + sbjct_file.write(sbjct_seq[i:i+60] + "\n") + + # Write Allele profile results tables in results file and table file + rows.sort(key=lambda x: x[0]) + result_file.write(text_table(table_header, rows)) + for row in rows: + table_file.write("\t".join(row) + "\n") + # Write any notes + if note != "": + result_file.write("\nNotes: {}\n\n".format(note)) + + # Write allignment output + result_file.write("\n\nExtended Output:\n\n") + make_aln(scheme, result_file, allele_matches, query_aligns, homol_aligns, sbjct_aligns) + + # Close all files + query_file.close() + sbjct_file.close() + table_file.close() + result_file.close() + +if args.quiet: + f.close()