Mercurial > repos > estrain > microrunqc
diff mlstAddFields.py @ 22:0b36ba6fb319 draft
Uploaded
author | estrain |
---|---|
date | Fri, 19 Jan 2024 11:47:53 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlstAddFields.py Fri Jan 19 11:47:53 2024 +0000 @@ -0,0 +1,73 @@ +#!/usr/bin/env + +import sys +import csv + +def find_index(headers, term): + try: + return headers.index(term) + except ValueError: + return -1 + +def main(mlst_file, db_path=None): + with open(mlst_file, 'r') as file: + reader = csv.reader(file, delimiter='\t') + mlstout = next(reader) + + schema = mlstout[1] + mlstST = mlstout[2] + + if db_path is None: + # If no database path is provided, find it using an external command + # This requires the 'mlst' command to be installed and available in the path + import subprocess + mlstdesc = subprocess.check_output(['mlst', '-h']).decode() + db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line] + if db_pubmlst: + mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "") + else: + raise Exception("Could not find MLST database location.") + else: + mlstloc = db_path + + mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt" + + schema_dict = {} + with open(mlst_file_path, 'r') as file: + reader = csv.reader(file, delimiter='\t') + headers = next(reader) + + clonal = find_index(headers, 'clonal_complex') + cc = find_index(headers, 'CC') + lineage = find_index(headers, 'Lineage') + species = find_index(headers, 'species') + + for line in reader: + desc = [] + if clonal > -1 and line[clonal]: + desc.append(f"clonal_complex={line[clonal]}") + if cc > -1 and line[cc]: + desc.append(f"CC={line[cc]}") + if lineage > -1 and line[lineage]: + desc.append(f"Lineage={line[lineage]}") + if species > -1 and line[species]: + desc.append(f"species={line[species]}") + schema_dict[line[0]] = ','.join(desc) + + output = mlstout[:3] + if mlstST in schema_dict: + output.append(schema_dict[mlstST]) + output.extend(mlstout[3:]) + + print("\t".join(output)) + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python mlstAddFields.py <mlst_file> [db_path]") + sys.exit(1) + + mlst_file = sys.argv[1] + db_path = sys.argv[2] if len(sys.argv) > 2 else None + + main(mlst_file, db_path) +