annotate mlstAddFields.py @ 22:0b36ba6fb319 draft

Uploaded
author estrain
date Fri, 19 Jan 2024 11:47:53 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
22
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
1 #!/usr/bin/env
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
2
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
3 import sys
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
4 import csv
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
5
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
6 def find_index(headers, term):
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
7 try:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
8 return headers.index(term)
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
9 except ValueError:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
10 return -1
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
11
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
12 def main(mlst_file, db_path=None):
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
13 with open(mlst_file, 'r') as file:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
14 reader = csv.reader(file, delimiter='\t')
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
15 mlstout = next(reader)
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
16
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
17 schema = mlstout[1]
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
18 mlstST = mlstout[2]
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
19
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
20 if db_path is None:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
21 # If no database path is provided, find it using an external command
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
22 # This requires the 'mlst' command to be installed and available in the path
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
23 import subprocess
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
24 mlstdesc = subprocess.check_output(['mlst', '-h']).decode()
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
25 db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line]
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
26 if db_pubmlst:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
27 mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "")
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
28 else:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
29 raise Exception("Could not find MLST database location.")
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
30 else:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
31 mlstloc = db_path
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
32
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
33 mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt"
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
34
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
35 schema_dict = {}
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
36 with open(mlst_file_path, 'r') as file:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
37 reader = csv.reader(file, delimiter='\t')
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
38 headers = next(reader)
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
39
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
40 clonal = find_index(headers, 'clonal_complex')
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
41 cc = find_index(headers, 'CC')
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
42 lineage = find_index(headers, 'Lineage')
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
43 species = find_index(headers, 'species')
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
44
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
45 for line in reader:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
46 desc = []
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
47 if clonal > -1 and line[clonal]:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
48 desc.append(f"clonal_complex={line[clonal]}")
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
49 if cc > -1 and line[cc]:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
50 desc.append(f"CC={line[cc]}")
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
51 if lineage > -1 and line[lineage]:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
52 desc.append(f"Lineage={line[lineage]}")
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
53 if species > -1 and line[species]:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
54 desc.append(f"species={line[species]}")
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
55 schema_dict[line[0]] = ','.join(desc)
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
56
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
57 output = mlstout[:3]
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
58 if mlstST in schema_dict:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
59 output.append(schema_dict[mlstST])
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
60 output.extend(mlstout[3:])
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
61
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
62 print("\t".join(output))
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
63
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
64 if __name__ == "__main__":
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
65 if len(sys.argv) < 2:
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
66 print("Usage: python mlstAddFields.py <mlst_file> [db_path]")
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
67 sys.exit(1)
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
68
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
69 mlst_file = sys.argv[1]
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
70 db_path = sys.argv[2] if len(sys.argv) > 2 else None
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
71
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
72 main(mlst_file, db_path)
0b36ba6fb319 Uploaded
estrain
parents:
diff changeset
73