0
|
1 #!/usr/bin/env python
|
|
2 # encoding: utf-8
|
|
3 '''
|
|
4 Module wrapping the NIST MSSEARCH application for matching
|
|
5 spectra one or more spectra libraries.
|
|
6 '''
|
|
7 import csv
|
|
8 import sys
|
|
9 import fileinput
|
|
10 import urllib2
|
|
11 import time
|
|
12 import utils
|
|
13 import uuid
|
|
14 import os
|
|
15 import subprocess
|
|
16 from report_generator import ReportGenerator
|
|
17
|
|
18 __author__ = "Pieter Lukasse"
|
|
19 __contact__ = "pieterlukasse@gmail.com,pieter.lukasse@wur.nl"
|
|
20 __copyright__ = "Copyright, 2015"
|
|
21 __license__ = "Apache v2"
|
|
22
|
|
23
|
|
24
|
7
|
25 def _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine, log_file):
|
0
|
26 '''
|
|
27 executes the following steps:
|
|
28 - copy nist_home_dir folder to nist_home_dir+_uuid
|
|
29 - copy spectrum_file.msp to ~/.wine/drive_c/NIST_uid
|
|
30 - creates nist_home_dir+_uuid/MSSEARCH/AUTOIMP.MSD -> pointing to C:\NIST_uid\MSSEARCH\temp.msd (in case of is_wine) or to nist_home_dir+_uuidM\SSEARCH\temp.msd
|
|
31 - creates nist_home_dir+_uuid/MSSEARCH/temp.msd -> pointing to C:\NIST_uid\spectrum_file.msp (in case of is_wine) or to nist_home_dir+_uuid\spectrum_file.msp
|
|
32 and the text "10 724" in the second row
|
|
33 - copy nistms.INI to nist_home_dir+_uuid/MSSEARCH , overwriting the existing one
|
|
34 - in case of is_wine:
|
|
35 replace all occurrences of C:\NIST with C:\NIST_uid in this new nistms.INI
|
|
36 else:
|
|
37 replace all occurrences of C:\NIST with nist_home_dir+_uuid in this new nistms.INI
|
|
38 '''
|
|
39 if nist_home_dir.endswith("/") or nist_home_dir.endswith("\\"):
|
|
40 nist_home_dir = nist_home_dir[:-1]
|
|
41
|
|
42 # small validation for wine scenario
|
|
43 if is_wine and not nist_home_dir.endswith("drive_c/NIST"):
|
|
44 raise Exception('Error: invalid NIST home. For wine usage NIST home dir must be in the .wine folder and then in drive_c/NIST')
|
|
45
|
|
46 new_nist_home = nist_home_dir+uuid_value
|
|
47 utils.copy_dir(nist_home_dir, new_nist_home)
|
|
48
|
|
49 utils.copy_file(spectrum_file, new_nist_home+"/spectrum_file.msp")
|
|
50
|
9
|
51 utils.log_message(log_file, "configuring NIST input...")
|
0
|
52 # remove old file:
|
|
53 os.remove(new_nist_home+"/MSSEARCH/AUTOIMP.MSD")
|
|
54 with open(new_nist_home + "/MSSEARCH/AUTOIMP.MSD", "a") as text_file:
|
|
55 if is_wine:
|
|
56 text_file.write("C:\\NIST" + uuid_value + "\\MSSEARCH\\temp.msd")
|
|
57 else:
|
|
58 text_file.write(new_nist_home + "\\MSSEARCH\\temp.msd")
|
|
59
|
|
60 with open(new_nist_home + "/MSSEARCH/temp.msd", "a") as text_file:
|
|
61 if is_wine:
|
|
62 text_file.write("C:\\NIST" + uuid_value + "\\spectrum_file.msp\n")
|
|
63 else:
|
|
64 text_file.write(new_nist_home + "\\spectrum_file.msp\n")
|
|
65 text_file.write("10 724")
|
|
66
|
|
67 replacement_text = new_nist_home
|
|
68 if is_wine:
|
|
69 replacement_text = "C:\\NIST" + uuid_value
|
|
70
|
|
71 # remove old file
|
|
72 os.remove(new_nist_home+"/MSSEARCH/nistms.INI")
|
|
73 # make new one
|
9
|
74 utils.log_message(log_file, "configuring NIST ini...")
|
0
|
75 o = open(new_nist_home+"/MSSEARCH/nistms.INI","a") #open for append
|
|
76 # TODO : this loop/replace below is a bit limited to specific variables...either test different NIST versions or make more generic (harder in case of wine, or we need extra "home in .INI file" parameter):
|
|
77 for line in open(nist_ini_file):
|
|
78 if "Library Directory=" in line:
|
|
79 line = "Library Directory="+ new_nist_home + "\\MSSEARCH\\\n"
|
|
80 if "Dir=" in line:
|
|
81 line = "Dir="+ replacement_text + "\\MSSEARCH\\\n"
|
|
82
|
|
83 o.write(line)
|
|
84 o.close()
|
|
85
|
|
86 return new_nist_home
|
|
87
|
16
|
88 def _clean_up_NIST(new_nist_home):
|
|
89 '''
|
|
90 remove folder
|
|
91 '''
|
|
92 utils.remove_dir(new_nist_home)
|
|
93
|
14
|
94 def _run_NIST(new_nist_home, output_file, is_wine, log_file, job_size):
|
0
|
95 '''
|
|
96 - run : (wine) new_nist_home/MSSEARCH/nistms$.exe /INSTRUMENT /PAR=2
|
|
97 - monitor : new_nist_home/MSSEARCH/SRCREADY.TXT for content = "1"
|
|
98 - when ready:
|
|
99 > copy SRCRESLT.TXT to output_file
|
|
100 > kill nist process
|
|
101 > (optional)remove ~/.wine/drive_c/NIST_uid/
|
|
102 > finish
|
|
103 '''
|
|
104 # to avoid conflicts in the orphan process killing (see end of this method), we will
|
|
105 # only run NIST again after previous nistms.exe process has been killed:
|
|
106 # TODO : solution is currently only for wine (in the windows context the solution is not there yet, but parallel calls are not expected as in windows we only run tests one by one for now)
|
|
107
|
|
108
|
|
109 # remove old file, if it is there:
|
|
110 file_to_monitor = new_nist_home+"/MSSEARCH/SRCREADY.TXT"
|
|
111 if os.path.exists(file_to_monitor):
|
|
112 os.remove(file_to_monitor)
|
|
113
|
|
114 exec_path = new_nist_home + "/MSSEARCH/nistms$.exe"
|
|
115
|
|
116 pro = ""
|
|
117 if is_wine:
|
9
|
118 utils.log_message(log_file, "calling wine with " + exec_path)
|
0
|
119 cmd = ["wine "+ exec_path + " /INSTRUMENT /PAR=2"]
|
|
120 # The os.setsid() is passed in the argument preexec_fn so
|
|
121 # it's run after the fork() and before exec() to run the shell.
|
|
122 pro = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
|
123 shell=True, preexec_fn=os.setsid)
|
|
124 else:
|
|
125 cmd = [
|
|
126 exec_path,
|
|
127 "/INSTRUMENT",
|
|
128 "/PAR=2"]
|
|
129 subprocess.call(cmd)
|
|
130
|
|
131
|
5
|
132 timeSleeping = 0
|
18
|
133 startUpTime = 60
|
0
|
134 # monitor process by checking state file:
|
9
|
135 utils.log_message(log_file, "monitoring SRCREADY.TXT...")
|
14
|
136 while True:
|
0
|
137 # check if SRCREADY.TXT is there already:
|
|
138 if os.path.exists(file_to_monitor):
|
|
139 break
|
|
140 time.sleep(2)
|
10
|
141 timeSleeping = timeSleeping+2
|
14
|
142 if timeSleeping > (startUpTime + job_size*2):
|
|
143 # abort with timeout:
|
17
|
144 utils.log_message(log_file, "No results after " + str(timeSleeping) + " seconds...")
|
20
|
145 _kill_NIST(pro, is_wine)
|
19
|
146 # uggly workaround:
|
|
147 utils.kill_process_by_name("nistms.exe")
|
|
148 utils.kill_process_by_name("nistms$.exe")
|
14
|
149 raise Exception('Error: timeout waiting for NIST results.')
|
0
|
150
|
17
|
151 utils.log_message(log_file, "done...")
|
0
|
152 # kill process:
|
|
153 #p.terminate() - not needed, nistm$ will terminate...nistms.exe is the one that
|
|
154 #stays open...and orphan..killing it:
|
20
|
155 _kill_NIST(pro, is_wine)
|
|
156
|
|
157 # copy SRCRESLT.TXT to output_file
|
|
158 result_file = new_nist_home+"/MSSEARCH/SRCRESLT.TXT"
|
|
159 utils.copy_file(result_file, output_file)
|
0
|
160
|
20
|
161
|
|
162
|
|
163 def _kill_NIST(process_group, is_wine):
|
|
164 '''
|
|
165 nistm$ will terminate...nistms.exe is the one that
|
|
166 stays open...and orphan. This method kills it.
|
|
167 '''
|
0
|
168 if is_wine:
|
|
169 # pid = utils.get_process_pid("nistms.exe")
|
|
170 # os.kill(pid, 9)
|
20
|
171 os.killpg(process_group.pid, 9)
|
0
|
172 else:
|
|
173 # windows case:
|
|
174 proc_name = "nistms.exe"
|
|
175 os.system("taskkill /f /im " + proc_name)
|
|
176
|
20
|
177
|
0
|
178
|
|
179
|
|
180 def _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict):
|
|
181 '''
|
|
182 This report will contain a page that displays essentially the same list as found in the
|
|
183 tabular output file (rendered with datatables jquery plugin), with some extra features:
|
|
184 - when user clicks on an entry, it should display the query spectrum and the hit spectrum
|
|
185 in "head to tail" and "difference" mode (see galaxy/report_example.png)
|
|
186 -> the query spectrum can be generated from the data in the input MSP file
|
|
187 -> the library "online representative" spectrum can be generated from data returned by http://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C537268&Index=0&Type=Mass ,
|
|
188 where C537268 in this case is the CAS ID without the '-' separators
|
|
189 '''
|
|
190 # step 1 : generate HTML via the jinja template engine
|
|
191 # step 1.1: make sure to link the query spectrum data to the corresponding html object for quick rendering when needed
|
14
|
192
|
|
193 # copy necessary .js files as well:
|
|
194 templates_folder = os.path.dirname(__file__) + '/templates/'
|
|
195 utils.copy_dir(templates_folder, output_html_report_files_path)
|
0
|
196
|
|
197 html_file = open(output_html_report,'w')
|
|
198 html_render = ReportGenerator(os.path.dirname(__file__), 'templates/main_template.html',hits_dict, spectra_dict)
|
|
199 html_render.render(html_file)
|
|
200
|
|
201 html_file.close()
|
|
202
|
|
203
|
|
204
|
|
205
|
|
206
|
|
207 return None
|
|
208
|
|
209 def _get_extra_info_and_link_cols(data_found, data_type_found, query_link):
|
|
210 '''
|
|
211 This method will go over the data found and will return a
|
|
212 list with the following items:
|
|
213 - Experiment details where hits have been found :
|
|
214 'organism', 'tissue','experiment_name','user_name','column_type'
|
|
215 - Link that executes same query
|
|
216
|
|
217 '''
|
|
218 # set() makes a unique list:
|
|
219 organism_set = []
|
|
220 tissue_set = []
|
|
221 experiment_name_set = []
|
|
222 user_name_set = []
|
|
223 column_type_set = []
|
|
224 cas_nr_set = []
|
|
225
|
|
226 if 'organism' in data_found:
|
|
227 organism_set = set(data_found['organism'])
|
|
228 if 'tissue' in data_found:
|
|
229 tissue_set = set(data_found['tissue'])
|
|
230 if 'experiment_name' in data_found:
|
|
231 experiment_name_set = set(data_found['experiment_name'])
|
|
232 if 'user_name' in data_found:
|
|
233 user_name_set = set(data_found['user_name'])
|
|
234 if 'column_type' in data_found:
|
|
235 column_type_set = set(data_found['column_type'])
|
|
236 if 'CAS' in data_found:
|
|
237 cas_nr_set = set(data_found['CAS'])
|
|
238
|
|
239
|
|
240 result = [data_type_found,
|
|
241
|
|
242 #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"):
|
|
243 "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"]
|
|
244 return result
|
|
245
|
|
246
|
|
247
|
|
248
|
|
249
|
|
250 # alternative: ?
|
|
251 # s = requests.Session()
|
|
252 # s.verify = False
|
|
253 # #s.auth = (token01, token02)
|
|
254 # resp = s.get(url, params={'name': 'anonymous'}, stream=True)
|
|
255 # content = resp.content
|
|
256 # # transform to dictionary:
|
|
257
|
|
258
|
|
259
|
|
260 def _save_data(data_rows, headers, out_csv):
|
|
261 '''
|
|
262 Writes tab-separated data to file
|
|
263 @param data_rows: dictionary containing merged/enriched dataset
|
|
264 @param out_csv: output csv file
|
|
265 '''
|
|
266
|
|
267 # Open output file for writing
|
|
268 outfile_single_handle = open(out_csv, 'wb')
|
|
269 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
|
|
270
|
|
271 # Write headers
|
|
272 output_single_handle.writerow(headers)
|
|
273
|
|
274 # Write one line for each row
|
|
275 for data_row in data_rows:
|
|
276 output_single_handle.writerow(data_row)
|
|
277
|
|
278 def _get_metexp_URL(metexp_dblink_file):
|
|
279 '''
|
|
280 Read out and return the URL stored in the given file.
|
|
281 '''
|
|
282 file_input = fileinput.input(metexp_dblink_file)
|
|
283 try:
|
|
284 for line in file_input:
|
|
285 if line[0] != '#':
|
|
286 # just return the first line that is not a comment line:
|
|
287 return line
|
|
288 finally:
|
|
289 file_input.close()
|
|
290
|
|
291
|
|
292 def main():
|
|
293 '''
|
|
294 Wrapper main function
|
|
295
|
|
296 The input expected is:
|
|
297 NIST_HOME dir
|
|
298 nistms.INI
|
|
299 spectrum_file.msp
|
|
300 outputfileName
|
|
301 (optional) htmlReportFile
|
|
302 (optional) htmlReportFile.files_path
|
|
303 '''
|
|
304 seconds_start = int(round(time.time()))
|
|
305
|
|
306 nist_home_dir = sys.argv[1]
|
|
307 nist_ini_file = sys.argv[2]
|
|
308 spectrum_file = sys.argv[3]
|
|
309 nist_output_file = sys.argv[4]
|
|
310 final_output_file = sys.argv[5]
|
7
|
311 output_log_file = sys.argv[6]
|
0
|
312 # html report pars:
|
|
313 output_html_report = None
|
|
314 output_html_report_files_path = None
|
7
|
315 if len(sys.argv) > 7:
|
|
316 output_html_report = sys.argv[7]
|
|
317 output_html_report_files_path = sys.argv[8]
|
0
|
318
|
|
319 is_wine = False
|
|
320 if "wine" in nist_home_dir:
|
|
321 is_wine = True
|
|
322
|
|
323 uuid_value = str(uuid.uuid4())
|
9
|
324
|
0
|
325 # prepare NIST environment for running:
|
9
|
326 new_nist_home = _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine, output_log_file)
|
0
|
327
|
|
328 # run NIST search command:
|
14
|
329 spectra_dict = utils.get_spectra_file_as_dict(spectrum_file)
|
|
330 job_size = len(spectra_dict)
|
|
331 _run_NIST(new_nist_home, nist_output_file, is_wine, output_log_file, job_size)
|
0
|
332
|
16
|
333 # clean-up NIST environment:
|
|
334 _clean_up_NIST(new_nist_home)
|
|
335
|
0
|
336 # write output tabular:
|
|
337 hits_dict = utils.get_nist_out_as_dict(nist_output_file)
|
|
338 utils.save_dict_as_tsv(hits_dict, final_output_file)
|
|
339
|
|
340 # create report:
|
14
|
341 if len(sys.argv) > 7:
|
0
|
342 _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict)
|
|
343
|
|
344
|
|
345 #_save_data(enriched_data, headers, output_result)
|
|
346
|
|
347 seconds_end = int(round(time.time()))
|
|
348 print "Took " + str(seconds_end - seconds_start) + " seconds"
|
|
349
|
|
350
|
|
351
|
|
352 if __name__ == '__main__':
|
|
353 main()
|