Mercurial > repos > estrain > microrunqc
comparison mlstAddFields.py @ 22:0b36ba6fb319 draft
Uploaded
| author | estrain |
|---|---|
| date | Fri, 19 Jan 2024 11:47:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 21:5083f8406e34 | 22:0b36ba6fb319 |
|---|---|
| 1 #!/usr/bin/env | |
| 2 | |
| 3 import sys | |
| 4 import csv | |
| 5 | |
| 6 def find_index(headers, term): | |
| 7 try: | |
| 8 return headers.index(term) | |
| 9 except ValueError: | |
| 10 return -1 | |
| 11 | |
| 12 def main(mlst_file, db_path=None): | |
| 13 with open(mlst_file, 'r') as file: | |
| 14 reader = csv.reader(file, delimiter='\t') | |
| 15 mlstout = next(reader) | |
| 16 | |
| 17 schema = mlstout[1] | |
| 18 mlstST = mlstout[2] | |
| 19 | |
| 20 if db_path is None: | |
| 21 # If no database path is provided, find it using an external command | |
| 22 # This requires the 'mlst' command to be installed and available in the path | |
| 23 import subprocess | |
| 24 mlstdesc = subprocess.check_output(['mlst', '-h']).decode() | |
| 25 db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line] | |
| 26 if db_pubmlst: | |
| 27 mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "") | |
| 28 else: | |
| 29 raise Exception("Could not find MLST database location.") | |
| 30 else: | |
| 31 mlstloc = db_path | |
| 32 | |
| 33 mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt" | |
| 34 | |
| 35 schema_dict = {} | |
| 36 with open(mlst_file_path, 'r') as file: | |
| 37 reader = csv.reader(file, delimiter='\t') | |
| 38 headers = next(reader) | |
| 39 | |
| 40 clonal = find_index(headers, 'clonal_complex') | |
| 41 cc = find_index(headers, 'CC') | |
| 42 lineage = find_index(headers, 'Lineage') | |
| 43 species = find_index(headers, 'species') | |
| 44 | |
| 45 for line in reader: | |
| 46 desc = [] | |
| 47 if clonal > -1 and line[clonal]: | |
| 48 desc.append(f"clonal_complex={line[clonal]}") | |
| 49 if cc > -1 and line[cc]: | |
| 50 desc.append(f"CC={line[cc]}") | |
| 51 if lineage > -1 and line[lineage]: | |
| 52 desc.append(f"Lineage={line[lineage]}") | |
| 53 if species > -1 and line[species]: | |
| 54 desc.append(f"species={line[species]}") | |
| 55 schema_dict[line[0]] = ','.join(desc) | |
| 56 | |
| 57 output = mlstout[:3] | |
| 58 if mlstST in schema_dict: | |
| 59 output.append(schema_dict[mlstST]) | |
| 60 output.extend(mlstout[3:]) | |
| 61 | |
| 62 print("\t".join(output)) | |
| 63 | |
| 64 if __name__ == "__main__": | |
| 65 if len(sys.argv) < 2: | |
| 66 print("Usage: python mlstAddFields.py <mlst_file> [db_path]") | |
| 67 sys.exit(1) | |
| 68 | |
| 69 mlst_file = sys.argv[1] | |
| 70 db_path = sys.argv[2] if len(sys.argv) > 2 else None | |
| 71 | |
| 72 main(mlst_file, db_path) | |
| 73 |
