Mercurial > repos > computational-metabolomics > cfmid
comparison cfmid.py @ 0:41c4de0ed4ec draft default tip
planemo upload for repository https://github.com/computational-metabolomics/cfm-galaxy/tree/master/tools/cfm commit f0157bb3b01871411f27c1d5bd4ccee2039335d0
author | computational-metabolomics |
---|---|
date | Wed, 15 Nov 2023 16:28:04 +0000 (17 months ago) |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:41c4de0ed4ec |
---|---|
1 import argparse | |
2 import csv | |
3 import os | |
4 import shutil | |
5 | |
6 | |
7 parser = argparse.ArgumentParser() | |
8 parser.add_argument("--input") | |
9 parser.add_argument("--db_local") | |
10 parser.add_argument("--num_highest") | |
11 parser.add_argument("--ppm_db") | |
12 parser.add_argument("--ppm_mass_tol") | |
13 parser.add_argument("--abs_mass_tol") | |
14 parser.add_argument("--polarity") | |
15 parser.add_argument("--score_type") | |
16 parser.add_argument("--results") | |
17 parser.add_argument("--tool_directory") | |
18 | |
19 args = parser.parse_args() | |
20 print(args) | |
21 | |
22 # Example | |
23 # python cfmid.py --abs_mass_tol='0.01' --db_local='test-data/demo_db.tsv' --input='test-data/input.msp' --num_highest='10' --polarity='pos' --ppm_db='10.0' --ppm_mass_tol='10.0' --results='results.txt' --score_type='Jaccard' --tool_directory='/home/rw/Documents/cfm-galaxy/tools/cfm' | |
24 | |
25 id2info = {} | |
26 mz2id = [] | |
27 # store DB in dicts | |
28 with open(args.db_local) as csvfile: | |
29 csvreader = csv.DictReader(csvfile, delimiter="\t") | |
30 for row in csvreader: | |
31 id2info[row["Identifier"]] = row | |
32 mz2id.append((float(row["MonoisotopicMass"]), row["Identifier"])) | |
33 | |
34 name_tmp = "tmp" | |
35 if os.path.isdir(name_tmp): | |
36 shutil.rmtree(name_tmp) | |
37 | |
38 os.makedirs(name_tmp) | |
39 with open(args.input, "r") as infile: | |
40 numlines = 0 | |
41 for line in infile: | |
42 line = line.strip() | |
43 if numlines == 0: | |
44 print(line) | |
45 if "NAME" in line: | |
46 featid = line.split("NAME: ")[1] | |
47 if "PRECURSORMZ" in line: | |
48 mz = float(line.split("PRECURSORMZ: ")[1]) | |
49 if args.polarity == "pos": | |
50 mz2 = mz - 1.007276 | |
51 else: | |
52 mz2 = mz + 1.007276 | |
53 if "Num Peaks" in line: | |
54 numlines = int(line.split("Num Peaks: ")[1]) | |
55 linesread = 0 | |
56 peaklist = [] | |
57 else: | |
58 if linesread == numlines: | |
59 numlines = 0 | |
60 cand_id_list = [] | |
61 mz_ranges = (float(args.ppm_db) * mz2) / 1e6 | |
62 mz_ranges = (mz2 - mz_ranges, mz2 + mz_ranges) | |
63 # check hits | |
64 for t in mz2id: | |
65 if (t[0] > mz_ranges[0]) and (t[0] < mz_ranges[1]): | |
66 cand_id_list.append(t[1]) | |
67 # run only if we got candidates | |
68 if len(cand_id_list) > 0: | |
69 # write spec file | |
70 with open("./tmpspec.txt", "w") as outfile: | |
71 for e in ["low", "mid", "high"]: | |
72 outfile.write(e + "\n") | |
73 for p in peaklist: | |
74 outfile.write(p[0] + "\t" + p[1] + "\n") | |
75 # write candidates file | |
76 with open("./tmpcand.txt", "w") as outfile: | |
77 for c in cand_id_list: | |
78 outfile.write( | |
79 "{0} {1}\n".format(c, id2info[c]["InChI"]) | |
80 ) # TODO: Use InChI or SMILES | |
81 | |
82 # create command line call | |
83 outi = os.path.join(name_tmp, "cfm_" + featid + ".txt") | |
84 cmd_command = "cfm-id tmpspec.txt {0} tmpcand.txt ".format(featid) | |
85 cmd_command += "{0} {1} {2} {3} ".format( | |
86 args.num_highest, | |
87 args.ppm_db, | |
88 args.ppm_mass_tol, | |
89 args.abs_mass_tol, | |
90 ) | |
91 if args.polarity == "pos": | |
92 cmd_command += ( | |
93 os.path.join( | |
94 args.tool_directory, | |
95 "data", | |
96 "positive_metab_se_cfm", | |
97 "param_output0.log", | |
98 ) | |
99 + " " | |
100 ) | |
101 cmd_command += ( | |
102 os.path.join( | |
103 args.tool_directory, | |
104 "data", | |
105 "positive_metab_se_cfm", | |
106 "param_config.txt", | |
107 ) | |
108 + " " | |
109 ) | |
110 else: | |
111 cmd_command += ( | |
112 os.path.join( | |
113 args.tool_directory, | |
114 "data", | |
115 "negative_metab_se_cfm/param_output0.log", | |
116 ) | |
117 + " " | |
118 ) | |
119 cmd_command += ( | |
120 os.path.join( | |
121 args.tool_directory, | |
122 "data", | |
123 "negative_metab_se_cfm/param_config.txt", | |
124 ) | |
125 + " " | |
126 ) | |
127 cmd_command += "{0} 1 {1}".format(args.score_type, outi) | |
128 # run | |
129 print(cmd_command) | |
130 os.system(cmd_command) | |
131 else: | |
132 line = tuple(line.split("\t")) | |
133 linesread += 1 | |
134 peaklist.append(line) | |
135 | |
136 | |
137 # merge outputs | |
138 outfiles = os.listdir(name_tmp) | |
139 outfiles.sort(key=lambda x: os.path.getmtime(os.path.join(name_tmp, x))) | |
140 with open(args.results, "w") as outfile: | |
141 outfile.write("UID\tRank\tScore\tIdentifier\tInChI\n") | |
142 for fname in outfiles: | |
143 fileid = os.path.basename(fname) | |
144 fileid = fileid.split("_")[1] | |
145 fileid = fileid.split(".txt")[0] | |
146 with open(os.path.join(name_tmp, fname)) as infile: | |
147 for line in infile: | |
148 line = line.replace(" ", "\t") | |
149 outfile.write(fileid + "\t" + line) |