22
|
1 #!/usr/bin/env
|
|
2
|
|
3 import sys
|
|
4 import csv
|
|
5
|
|
6 def find_index(headers, term):
|
|
7 try:
|
|
8 return headers.index(term)
|
|
9 except ValueError:
|
|
10 return -1
|
|
11
|
|
12 def main(mlst_file, db_path=None):
|
|
13 with open(mlst_file, 'r') as file:
|
|
14 reader = csv.reader(file, delimiter='\t')
|
|
15 mlstout = next(reader)
|
|
16
|
|
17 schema = mlstout[1]
|
|
18 mlstST = mlstout[2]
|
|
19
|
|
20 if db_path is None:
|
|
21 # If no database path is provided, find it using an external command
|
|
22 # This requires the 'mlst' command to be installed and available in the path
|
|
23 import subprocess
|
|
24 mlstdesc = subprocess.check_output(['mlst', '-h']).decode()
|
|
25 db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line]
|
|
26 if db_pubmlst:
|
|
27 mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "")
|
|
28 else:
|
|
29 raise Exception("Could not find MLST database location.")
|
|
30 else:
|
|
31 mlstloc = db_path
|
|
32
|
|
33 mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt"
|
|
34
|
|
35 schema_dict = {}
|
|
36 with open(mlst_file_path, 'r') as file:
|
|
37 reader = csv.reader(file, delimiter='\t')
|
|
38 headers = next(reader)
|
|
39
|
|
40 clonal = find_index(headers, 'clonal_complex')
|
|
41 cc = find_index(headers, 'CC')
|
|
42 lineage = find_index(headers, 'Lineage')
|
|
43 species = find_index(headers, 'species')
|
|
44
|
|
45 for line in reader:
|
|
46 desc = []
|
|
47 if clonal > -1 and line[clonal]:
|
|
48 desc.append(f"clonal_complex={line[clonal]}")
|
|
49 if cc > -1 and line[cc]:
|
|
50 desc.append(f"CC={line[cc]}")
|
|
51 if lineage > -1 and line[lineage]:
|
|
52 desc.append(f"Lineage={line[lineage]}")
|
|
53 if species > -1 and line[species]:
|
|
54 desc.append(f"species={line[species]}")
|
|
55 schema_dict[line[0]] = ','.join(desc)
|
|
56
|
|
57 output = mlstout[:3]
|
|
58 if mlstST in schema_dict:
|
|
59 output.append(schema_dict[mlstST])
|
|
60 output.extend(mlstout[3:])
|
|
61
|
|
62 print("\t".join(output))
|
|
63
|
|
64 if __name__ == "__main__":
|
|
65 if len(sys.argv) < 2:
|
|
66 print("Usage: python mlstAddFields.py <mlst_file> [db_path]")
|
|
67 sys.exit(1)
|
|
68
|
|
69 mlst_file = sys.argv[1]
|
|
70 db_path = sys.argv[2] if len(sys.argv) > 2 else None
|
|
71
|
|
72 main(mlst_file, db_path)
|
|
73
|