0
|
1 #!/usr/bin/env python
|
|
2 # encoding: utf-8
|
|
3 '''
|
|
4 Module wrapping the NIST MSSEARCH application for matching
|
|
5 spectra one or more spectra libraries.
|
|
6 '''
|
|
7 import csv
|
|
8 import sys
|
|
9 import fileinput
|
|
10 import urllib2
|
|
11 import time
|
|
12 import utils
|
|
13 import uuid
|
|
14 import os
|
|
15 import subprocess
|
|
16 from report_generator import ReportGenerator
|
|
17
|
|
18 __author__ = "Pieter Lukasse"
|
|
19 __contact__ = "pieterlukasse@gmail.com,pieter.lukasse@wur.nl"
|
|
20 __copyright__ = "Copyright, 2015"
|
|
21 __license__ = "Apache v2"
|
|
22
|
|
23
|
|
24
|
7
|
25 def _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine, log_file):
|
0
|
26 '''
|
|
27 executes the following steps:
|
|
28 - copy nist_home_dir folder to nist_home_dir+_uuid
|
|
29 - copy spectrum_file.msp to ~/.wine/drive_c/NIST_uid
|
|
30 - creates nist_home_dir+_uuid/MSSEARCH/AUTOIMP.MSD -> pointing to C:\NIST_uid\MSSEARCH\temp.msd (in case of is_wine) or to nist_home_dir+_uuidM\SSEARCH\temp.msd
|
|
31 - creates nist_home_dir+_uuid/MSSEARCH/temp.msd -> pointing to C:\NIST_uid\spectrum_file.msp (in case of is_wine) or to nist_home_dir+_uuid\spectrum_file.msp
|
|
32 and the text "10 724" in the second row
|
|
33 - copy nistms.INI to nist_home_dir+_uuid/MSSEARCH , overwriting the existing one
|
|
34 - in case of is_wine:
|
|
35 replace all occurrences of C:\NIST with C:\NIST_uid in this new nistms.INI
|
|
36 else:
|
|
37 replace all occurrences of C:\NIST with nist_home_dir+_uuid in this new nistms.INI
|
|
38 '''
|
|
39 if nist_home_dir.endswith("/") or nist_home_dir.endswith("\\"):
|
|
40 nist_home_dir = nist_home_dir[:-1]
|
|
41
|
|
42 # small validation for wine scenario
|
|
43 if is_wine and not nist_home_dir.endswith("drive_c/NIST"):
|
|
44 raise Exception('Error: invalid NIST home. For wine usage NIST home dir must be in the .wine folder and then in drive_c/NIST')
|
|
45
|
|
46 new_nist_home = nist_home_dir+uuid_value
|
|
47 utils.copy_dir(nist_home_dir, new_nist_home)
|
|
48
|
|
49 utils.copy_file(spectrum_file, new_nist_home+"/spectrum_file.msp")
|
|
50
|
9
|
51 utils.log_message(log_file, "configuring NIST input...")
|
0
|
52 # remove old file:
|
|
53 os.remove(new_nist_home+"/MSSEARCH/AUTOIMP.MSD")
|
|
54 with open(new_nist_home + "/MSSEARCH/AUTOIMP.MSD", "a") as text_file:
|
|
55 if is_wine:
|
|
56 text_file.write("C:\\NIST" + uuid_value + "\\MSSEARCH\\temp.msd")
|
|
57 else:
|
|
58 text_file.write(new_nist_home + "\\MSSEARCH\\temp.msd")
|
|
59
|
|
60 with open(new_nist_home + "/MSSEARCH/temp.msd", "a") as text_file:
|
|
61 if is_wine:
|
|
62 text_file.write("C:\\NIST" + uuid_value + "\\spectrum_file.msp\n")
|
|
63 else:
|
|
64 text_file.write(new_nist_home + "\\spectrum_file.msp\n")
|
|
65 text_file.write("10 724")
|
|
66
|
|
67 replacement_text = new_nist_home
|
|
68 if is_wine:
|
|
69 replacement_text = "C:\\NIST" + uuid_value
|
|
70
|
|
71 # remove old file
|
|
72 os.remove(new_nist_home+"/MSSEARCH/nistms.INI")
|
|
73 # make new one
|
9
|
74 utils.log_message(log_file, "configuring NIST ini...")
|
0
|
75 o = open(new_nist_home+"/MSSEARCH/nistms.INI","a") #open for append
|
|
76 # TODO : this loop/replace below is a bit limited to specific variables...either test different NIST versions or make more generic (harder in case of wine, or we need extra "home in .INI file" parameter):
|
|
77 for line in open(nist_ini_file):
|
|
78 if "Library Directory=" in line:
|
|
79 line = "Library Directory="+ new_nist_home + "\\MSSEARCH\\\n"
|
|
80 if "Dir=" in line:
|
|
81 line = "Dir="+ replacement_text + "\\MSSEARCH\\\n"
|
|
82
|
|
83 o.write(line)
|
|
84 o.close()
|
|
85
|
|
86 return new_nist_home
|
|
87
|
7
|
88 def _run_NIST(new_nist_home, output_file, is_wine, log_file):
|
0
|
89 '''
|
|
90 - run : (wine) new_nist_home/MSSEARCH/nistms$.exe /INSTRUMENT /PAR=2
|
|
91 - monitor : new_nist_home/MSSEARCH/SRCREADY.TXT for content = "1"
|
|
92 - when ready:
|
|
93 > copy SRCRESLT.TXT to output_file
|
|
94 > kill nist process
|
|
95 > (optional)remove ~/.wine/drive_c/NIST_uid/
|
|
96 > finish
|
|
97 '''
|
|
98 # to avoid conflicts in the orphan process killing (see end of this method), we will
|
|
99 # only run NIST again after previous nistms.exe process has been killed:
|
|
100 # TODO : solution is currently only for wine (in the windows context the solution is not there yet, but parallel calls are not expected as in windows we only run tests one by one for now)
|
|
101 # if is_wine:
|
|
102 # while True:
|
|
103 # # check if process exists. If not, break loop and continue
|
|
104 # pid = utils.get_process_pid("nistms.exe")
|
|
105 # if pid == -1:
|
|
106 # break
|
|
107 # time.sleep(2)
|
|
108
|
|
109
|
|
110 # remove old file, if it is there:
|
|
111 file_to_monitor = new_nist_home+"/MSSEARCH/SRCREADY.TXT"
|
|
112 if os.path.exists(file_to_monitor):
|
|
113 os.remove(file_to_monitor)
|
|
114
|
|
115 exec_path = new_nist_home + "/MSSEARCH/nistms$.exe"
|
|
116
|
|
117 pro = ""
|
|
118 if is_wine:
|
9
|
119 utils.log_message(log_file, "calling wine with " + exec_path)
|
0
|
120 print "calling wine with " + exec_path
|
|
121 cmd = ["wine "+ exec_path + " /INSTRUMENT /PAR=2"]
|
|
122 # The os.setsid() is passed in the argument preexec_fn so
|
|
123 # it's run after the fork() and before exec() to run the shell.
|
|
124 pro = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
|
125 shell=True, preexec_fn=os.setsid)
|
|
126 else:
|
|
127 cmd = [
|
|
128 exec_path,
|
|
129 "/INSTRUMENT",
|
|
130 "/PAR=2"]
|
|
131 subprocess.call(cmd)
|
|
132
|
|
133
|
5
|
134 timeSleeping = 0
|
0
|
135 # monitor process by checking state file:
|
9
|
136 utils.log_message(log_file, "monitoring SRCREADY.TXT...")
|
10
|
137 while timeSleeping < 20:
|
0
|
138 # check if SRCREADY.TXT is there already:
|
|
139 if os.path.exists(file_to_monitor):
|
|
140 break
|
|
141 time.sleep(2)
|
10
|
142 timeSleeping = timeSleeping+2
|
0
|
143
|
|
144 # kill process:
|
|
145 #p.terminate() - not needed, nistm$ will terminate...nistms.exe is the one that
|
|
146 #stays open...and orphan..killing it:
|
|
147
|
9
|
148 utils.log_message(log_file, "killing wine process...")
|
0
|
149 if is_wine:
|
|
150 # pid = utils.get_process_pid("nistms.exe")
|
|
151 # os.kill(pid, 9)
|
|
152 os.killpg(pro.pid, 9)
|
|
153 else:
|
|
154 # windows case:
|
|
155 proc_name = "nistms.exe"
|
|
156 os.system("taskkill /f /im " + proc_name)
|
|
157
|
|
158 # copy SRCRESLT.TXT to output_file
|
|
159 result_file = new_nist_home+"/MSSEARCH/SRCRESLT.TXT"
|
|
160 utils.copy_file(result_file, output_file)
|
|
161
|
|
162
|
|
163 def _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict):
|
|
164 '''
|
|
165 This report will contain a page that displays essentially the same list as found in the
|
|
166 tabular output file (rendered with datatables jquery plugin), with some extra features:
|
|
167 - when user clicks on an entry, it should display the query spectrum and the hit spectrum
|
|
168 in "head to tail" and "difference" mode (see galaxy/report_example.png)
|
|
169 -> the query spectrum can be generated from the data in the input MSP file
|
|
170 -> the library "online representative" spectrum can be generated from data returned by http://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C537268&Index=0&Type=Mass ,
|
|
171 where C537268 in this case is the CAS ID without the '-' separators
|
|
172 '''
|
|
173 # step 1 : generate HTML via the jinja template engine
|
|
174 # step 1.1: make sure to link the query spectrum data to the corresponding html object for quick rendering when needed
|
|
175
|
|
176 html_file = open(output_html_report,'w')
|
|
177 html_render = ReportGenerator(os.path.dirname(__file__), 'templates/main_template.html',hits_dict, spectra_dict)
|
|
178 html_render.render(html_file)
|
|
179 # copy necessary .js files as well:
|
|
180 templates_folder = os.path.dirname(__file__) + '/templates/'
|
|
181 utils.copy_file(templates_folder + 'spectrum_gen.js', output_html_report_files_path+"/spectrum_gen.js")
|
|
182 utils.copy_dir(templates_folder + 'lib', output_html_report_files_path+'/lib' )
|
|
183 utils.copy_dir(templates_folder + 'images', output_html_report_files_path+'/images' )
|
|
184
|
|
185 html_file.close()
|
|
186
|
|
187
|
|
188
|
|
189
|
|
190
|
|
191 return None
|
|
192
|
|
193 def _get_extra_info_and_link_cols(data_found, data_type_found, query_link):
|
|
194 '''
|
|
195 This method will go over the data found and will return a
|
|
196 list with the following items:
|
|
197 - Experiment details where hits have been found :
|
|
198 'organism', 'tissue','experiment_name','user_name','column_type'
|
|
199 - Link that executes same query
|
|
200
|
|
201 '''
|
|
202 # set() makes a unique list:
|
|
203 organism_set = []
|
|
204 tissue_set = []
|
|
205 experiment_name_set = []
|
|
206 user_name_set = []
|
|
207 column_type_set = []
|
|
208 cas_nr_set = []
|
|
209
|
|
210 if 'organism' in data_found:
|
|
211 organism_set = set(data_found['organism'])
|
|
212 if 'tissue' in data_found:
|
|
213 tissue_set = set(data_found['tissue'])
|
|
214 if 'experiment_name' in data_found:
|
|
215 experiment_name_set = set(data_found['experiment_name'])
|
|
216 if 'user_name' in data_found:
|
|
217 user_name_set = set(data_found['user_name'])
|
|
218 if 'column_type' in data_found:
|
|
219 column_type_set = set(data_found['column_type'])
|
|
220 if 'CAS' in data_found:
|
|
221 cas_nr_set = set(data_found['CAS'])
|
|
222
|
|
223
|
|
224 result = [data_type_found,
|
|
225
|
|
226 #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"):
|
|
227 "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"]
|
|
228 return result
|
|
229
|
|
230
|
|
231
|
|
232
|
|
233
|
|
234 # alternative: ?
|
|
235 # s = requests.Session()
|
|
236 # s.verify = False
|
|
237 # #s.auth = (token01, token02)
|
|
238 # resp = s.get(url, params={'name': 'anonymous'}, stream=True)
|
|
239 # content = resp.content
|
|
240 # # transform to dictionary:
|
|
241
|
|
242
|
|
243
|
|
244 def _save_data(data_rows, headers, out_csv):
|
|
245 '''
|
|
246 Writes tab-separated data to file
|
|
247 @param data_rows: dictionary containing merged/enriched dataset
|
|
248 @param out_csv: output csv file
|
|
249 '''
|
|
250
|
|
251 # Open output file for writing
|
|
252 outfile_single_handle = open(out_csv, 'wb')
|
|
253 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
|
|
254
|
|
255 # Write headers
|
|
256 output_single_handle.writerow(headers)
|
|
257
|
|
258 # Write one line for each row
|
|
259 for data_row in data_rows:
|
|
260 output_single_handle.writerow(data_row)
|
|
261
|
|
262 def _get_metexp_URL(metexp_dblink_file):
|
|
263 '''
|
|
264 Read out and return the URL stored in the given file.
|
|
265 '''
|
|
266 file_input = fileinput.input(metexp_dblink_file)
|
|
267 try:
|
|
268 for line in file_input:
|
|
269 if line[0] != '#':
|
|
270 # just return the first line that is not a comment line:
|
|
271 return line
|
|
272 finally:
|
|
273 file_input.close()
|
|
274
|
|
275
|
|
276 def main():
|
|
277 '''
|
|
278 Wrapper main function
|
|
279
|
|
280 The input expected is:
|
|
281 NIST_HOME dir
|
|
282 nistms.INI
|
|
283 spectrum_file.msp
|
|
284 outputfileName
|
|
285 (optional) htmlReportFile
|
|
286 (optional) htmlReportFile.files_path
|
|
287 '''
|
|
288 seconds_start = int(round(time.time()))
|
|
289
|
|
290 nist_home_dir = sys.argv[1]
|
|
291 nist_ini_file = sys.argv[2]
|
|
292 spectrum_file = sys.argv[3]
|
|
293 nist_output_file = sys.argv[4]
|
|
294 final_output_file = sys.argv[5]
|
7
|
295 output_log_file = sys.argv[6]
|
0
|
296 # html report pars:
|
|
297 output_html_report = None
|
|
298 output_html_report_files_path = None
|
7
|
299 if len(sys.argv) > 7:
|
|
300 output_html_report = sys.argv[7]
|
|
301 output_html_report_files_path = sys.argv[8]
|
0
|
302
|
|
303 is_wine = False
|
|
304 if "wine" in nist_home_dir:
|
|
305 is_wine = True
|
|
306
|
|
307 uuid_value = str(uuid.uuid4())
|
9
|
308
|
0
|
309 # prepare NIST environment for running:
|
9
|
310 new_nist_home = _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine, output_log_file)
|
0
|
311
|
|
312 # run NIST search command:
|
9
|
313 _run_NIST(new_nist_home, nist_output_file, is_wine, output_log_file)
|
0
|
314
|
|
315 # write output tabular:
|
|
316 hits_dict = utils.get_nist_out_as_dict(nist_output_file)
|
|
317 utils.save_dict_as_tsv(hits_dict, final_output_file)
|
|
318
|
|
319 # create report:
|
|
320 if len(sys.argv) > 6:
|
|
321 spectra_dict = utils.get_spectra_file_as_dict(spectrum_file)
|
|
322 _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict)
|
|
323
|
|
324
|
|
325 #_save_data(enriched_data, headers, output_result)
|
|
326
|
|
327 seconds_end = int(round(time.time()))
|
|
328 print "Took " + str(seconds_end - seconds_start) + " seconds"
|
|
329
|
|
330
|
|
331
|
|
332 if __name__ == '__main__':
|
|
333 main()
|