Mercurial > repos > estrain > microrunqc
comparison mlstAddFields.py @ 22:0b36ba6fb319 draft
Uploaded
author | estrain |
---|---|
date | Fri, 19 Jan 2024 11:47:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
21:5083f8406e34 | 22:0b36ba6fb319 |
---|---|
1 #!/usr/bin/env | |
2 | |
3 import sys | |
4 import csv | |
5 | |
6 def find_index(headers, term): | |
7 try: | |
8 return headers.index(term) | |
9 except ValueError: | |
10 return -1 | |
11 | |
12 def main(mlst_file, db_path=None): | |
13 with open(mlst_file, 'r') as file: | |
14 reader = csv.reader(file, delimiter='\t') | |
15 mlstout = next(reader) | |
16 | |
17 schema = mlstout[1] | |
18 mlstST = mlstout[2] | |
19 | |
20 if db_path is None: | |
21 # If no database path is provided, find it using an external command | |
22 # This requires the 'mlst' command to be installed and available in the path | |
23 import subprocess | |
24 mlstdesc = subprocess.check_output(['mlst', '-h']).decode() | |
25 db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line] | |
26 if db_pubmlst: | |
27 mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "") | |
28 else: | |
29 raise Exception("Could not find MLST database location.") | |
30 else: | |
31 mlstloc = db_path | |
32 | |
33 mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt" | |
34 | |
35 schema_dict = {} | |
36 with open(mlst_file_path, 'r') as file: | |
37 reader = csv.reader(file, delimiter='\t') | |
38 headers = next(reader) | |
39 | |
40 clonal = find_index(headers, 'clonal_complex') | |
41 cc = find_index(headers, 'CC') | |
42 lineage = find_index(headers, 'Lineage') | |
43 species = find_index(headers, 'species') | |
44 | |
45 for line in reader: | |
46 desc = [] | |
47 if clonal > -1 and line[clonal]: | |
48 desc.append(f"clonal_complex={line[clonal]}") | |
49 if cc > -1 and line[cc]: | |
50 desc.append(f"CC={line[cc]}") | |
51 if lineage > -1 and line[lineage]: | |
52 desc.append(f"Lineage={line[lineage]}") | |
53 if species > -1 and line[species]: | |
54 desc.append(f"species={line[species]}") | |
55 schema_dict[line[0]] = ','.join(desc) | |
56 | |
57 output = mlstout[:3] | |
58 if mlstST in schema_dict: | |
59 output.append(schema_dict[mlstST]) | |
60 output.extend(mlstout[3:]) | |
61 | |
62 print("\t".join(output)) | |
63 | |
64 if __name__ == "__main__": | |
65 if len(sys.argv) < 2: | |
66 print("Usage: python mlstAddFields.py <mlst_file> [db_path]") | |
67 sys.exit(1) | |
68 | |
69 mlst_file = sys.argv[1] | |
70 db_path = sys.argv[2] if len(sys.argv) > 2 else None | |
71 | |
72 main(mlst_file, db_path) | |
73 |