| Previous changeset 11:7b2ce2933355 (2023-10-25) Next changeset 13:af2d3c8f616b (2023-10-25) |
|
Commit message:
Uploaded |
|
added:
display.py |
| b |
| diff -r 7b2ce2933355 -r 8393cc540777 display.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display.py Wed Oct 25 15:46:02 2023 +0000 |
| [ |
| @@ -0,0 +1,174 @@ +import sys, logging, glob, os + +logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s") + +HELP_MSG = """Tool to extract all the Receptor Binding Proteins (RBP) and uncover the inherent polymorphisms from a Staphylococcus phage genome. +It uses sequence homology to cluster similar RBPs sequences and identify existing mutations. +Hereon, we attribute to each polymorphism a representative score through developed and widely accepted matrices. +The lower the value more impactful this alteration. + +You can use the tool to analyze a complete genome sequence or a single gene sequence. "--g True" representation when looking for a genome, "--g False" otherwise. +Defaults to "--g False". + +Parameters: + --seq: Sequence string. + --file: File path to the complete sequence. It requires the full file path and only string representation inside of it. + --g: Parameter to check if we are analyzing a complete genome or only a sequence to uncover polymorphisms. True if genome, False otherwise. + --t: Parameter that defines the threshold of alternative allele threshold. Higher values will reduce the number of candidate single-nucleotide polymorphisms. Therefore, we recommend to mantain it between 40 and 60. The default value is 40. + +Use only one of the mentioned parameters according to your preference. Other parameters will be ignored.""" + +DOWNLOAD_FOLDER = os.path.join(os.getenv("USERPROFILE"), "Downloads") + + +def pretty_output(location: tuple, prediction: str, sequence: str, snps: dict): + """Method to create an output file with a pretty demonstration. + + Args: + location (tuple): Location details of the SNP. + prediction (str): Sequence prediction. + sequence (str): Sequence. + snps (dict): SNPs details. + """ + snps_string = "" + snps_sorted = dict(sorted(snps.items(), key=lambda item: item[1][0])) + for pos, snp in snps_sorted.items(): + score, details = snp + pos_str = assert_size(str(pos), 4) + score_str = assert_size(str(score), 7) + detail_string = f"\tPosition: {pos_str} || Score: {score_str} || Substitutions: {details['Nucleotide']}, {details['Amino Acid']}\n" + snps_string += detail_string + + data = f"""\n------------------------------------------------------ +Classification: {prediction} || Position: {location}\n +Sequence:\n{sequence}\n +Predictions:\n{snps_string}""" + + i = 1 + while os.path.exists(f"{DOWNLOAD_FOLDER}/output_{i}.txt"): + i += 1 + + with open(f"{DOWNLOAD_FOLDER}/output_{i}.txt", "w") as file: + file.write(data) + + +def assert_size(value: str, final_size: int): + size = len(value) + to_add = final_size - size + for _ in range(to_add): + value += " " + return value + + +def arguments_parser_galaxy(args: list) -> tuple: + """Method to the parse the arguments and obtain the genome input. + + Args: + args (list): Arguments. + + Returns: + tuple: Genome string, genome verification boolean, and SNP threshold. + """ + sequence_var, genome_check_var, threshold_var = args[1:] + + # Genome check value + genome_check = True if genome_check_var.lower() == "true" else False + + # Getting the threshold value + try: + threshold = int(threshold_var) + except ValueError as _: + logging.error( + f"{threshold_var} not valid, use only numeric values. Using the default value of 40." + ) + threshold = 40 + + # Loading file + try: + with open(sequence_var, "r") as f: + sequence = f.read() + except Exception as e: + logging.error(e) + sys.exit(1) + + return sequence, genome_check, threshold + + +def arguments_parser(args: list) -> tuple: + """Method to the parse the arguments and obtain the genome input. + + Args: + args (list): Arguments. + + Returns: + tuple: Genome string, genome verification boolean, and SNP threshold. + """ + possible_variables = ["--seq", "--file", "--g", "--t"] + variables = list(filter(lambda x: "--" in x, args[1:])) + values = list(filter(lambda x: x not in variables, args[1:])) + final_var, final_value = (None, None) + genome_check = False + threshold = None + for idx, variable in enumerate(variables): + try: + value = values[idx] + except: + value = None + + if variable == "--g": + genome_check = True if value.lower() == "true" else False + continue + + if variable == "--help": + print() + print(HELP_MSG) + sys.exit(1) + + if variable == "--t": + try: + threshold = float(value) + except: + logging.warning( + f"'{value}' not valid, use only numeric values. Using the default value of 40." + ) + continue + + # Ignore if invalid variable + if variable not in possible_variables: + logging.warning(f"'{variable}' not valid, ignoring...") + continue + + final_var, final_value = (variable, value) + + if not final_var: + logging.info(f"Use the correct parameters.") + sys.exit(1) + + if final_var == "--seq": + verify_genome_seq(final_value) + return final_value, genome_check, threshold + + try: + with open(final_value, "r") as f: + sequence = f.read() + except Exception as e: + logging.error(e) + sys.exit(1) + + # Remove new lines if exist + sequence = sequence.replace("\n", "") + verify_genome_seq(sequence) + return sequence, genome_check, threshold + + +def verify_genome_seq(sequence: str): + """Method to verify if the genome sequence is valid. + + Args: + sequence (str): Genome sequence. + """ + chars = list(set(list(sequence.upper()))) + for char in chars: + if char not in "ACTGN": + logging.warning(f"Genome sequence should only contain 'ACTGN' characters.") + sys.exit(1) |