0
|
1 #!/usr/bin/env python
|
|
2 # encoding: utf-8
|
|
3 '''
|
|
4 Module wrapping the NIST MSSEARCH application for matching
|
|
5 spectra one or more spectra libraries.
|
|
6 '''
|
|
7 import csv
|
|
8 import sys
|
|
9 import fileinput
|
|
10 import urllib2
|
|
11 import time
|
|
12 import utils
|
|
13 import uuid
|
|
14 import os
|
|
15 import subprocess
|
|
16 from report_generator import ReportGenerator
|
|
17
|
|
18 __author__ = "Pieter Lukasse"
|
|
19 __contact__ = "pieterlukasse@gmail.com,pieter.lukasse@wur.nl"
|
|
20 __copyright__ = "Copyright, 2015"
|
|
21 __license__ = "Apache v2"
|
|
22
|
|
23
|
|
24
|
|
25 def _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine):
|
|
26 '''
|
|
27 executes the following steps:
|
|
28 - copy nist_home_dir folder to nist_home_dir+_uuid
|
|
29 - copy spectrum_file.msp to ~/.wine/drive_c/NIST_uid
|
|
30 - creates nist_home_dir+_uuid/MSSEARCH/AUTOIMP.MSD -> pointing to C:\NIST_uid\MSSEARCH\temp.msd (in case of is_wine) or to nist_home_dir+_uuidM\SSEARCH\temp.msd
|
|
31 - creates nist_home_dir+_uuid/MSSEARCH/temp.msd -> pointing to C:\NIST_uid\spectrum_file.msp (in case of is_wine) or to nist_home_dir+_uuid\spectrum_file.msp
|
|
32 and the text "10 724" in the second row
|
|
33 - copy nistms.INI to nist_home_dir+_uuid/MSSEARCH , overwriting the existing one
|
|
34 - in case of is_wine:
|
|
35 replace all occurrences of C:\NIST with C:\NIST_uid in this new nistms.INI
|
|
36 else:
|
|
37 replace all occurrences of C:\NIST with nist_home_dir+_uuid in this new nistms.INI
|
|
38 '''
|
|
39 if nist_home_dir.endswith("/") or nist_home_dir.endswith("\\"):
|
|
40 nist_home_dir = nist_home_dir[:-1]
|
|
41
|
|
42 # small validation for wine scenario
|
|
43 if is_wine and not nist_home_dir.endswith("drive_c/NIST"):
|
|
44 raise Exception('Error: invalid NIST home. For wine usage NIST home dir must be in the .wine folder and then in drive_c/NIST')
|
|
45
|
|
46 new_nist_home = nist_home_dir+uuid_value
|
|
47 utils.copy_dir(nist_home_dir, new_nist_home)
|
|
48
|
|
49 utils.copy_file(spectrum_file, new_nist_home+"/spectrum_file.msp")
|
|
50
|
|
51 # remove old file:
|
|
52 os.remove(new_nist_home+"/MSSEARCH/AUTOIMP.MSD")
|
|
53 with open(new_nist_home + "/MSSEARCH/AUTOIMP.MSD", "a") as text_file:
|
|
54 if is_wine:
|
|
55 text_file.write("C:\\NIST" + uuid_value + "\\MSSEARCH\\temp.msd")
|
|
56 else:
|
|
57 text_file.write(new_nist_home + "\\MSSEARCH\\temp.msd")
|
|
58
|
|
59 with open(new_nist_home + "/MSSEARCH/temp.msd", "a") as text_file:
|
|
60 if is_wine:
|
|
61 text_file.write("C:\\NIST" + uuid_value + "\\spectrum_file.msp\n")
|
|
62 else:
|
|
63 text_file.write(new_nist_home + "\\spectrum_file.msp\n")
|
|
64 text_file.write("10 724")
|
|
65
|
|
66 replacement_text = new_nist_home
|
|
67 if is_wine:
|
|
68 replacement_text = "C:\\NIST" + uuid_value
|
|
69
|
|
70 # remove old file
|
|
71 os.remove(new_nist_home+"/MSSEARCH/nistms.INI")
|
|
72 # make new one
|
|
73 o = open(new_nist_home+"/MSSEARCH/nistms.INI","a") #open for append
|
|
74 # TODO : this loop/replace below is a bit limited to specific variables...either test different NIST versions or make more generic (harder in case of wine, or we need extra "home in .INI file" parameter):
|
|
75 for line in open(nist_ini_file):
|
|
76 if "Library Directory=" in line:
|
|
77 line = "Library Directory="+ new_nist_home + "\\MSSEARCH\\\n"
|
|
78 if "Dir=" in line:
|
|
79 line = "Dir="+ replacement_text + "\\MSSEARCH\\\n"
|
|
80
|
|
81 o.write(line)
|
|
82 o.close()
|
|
83
|
|
84 return new_nist_home
|
|
85
|
|
86 def _run_NIST(new_nist_home, output_file, is_wine):
|
|
87 '''
|
|
88 - run : (wine) new_nist_home/MSSEARCH/nistms$.exe /INSTRUMENT /PAR=2
|
|
89 - monitor : new_nist_home/MSSEARCH/SRCREADY.TXT for content = "1"
|
|
90 - when ready:
|
|
91 > copy SRCRESLT.TXT to output_file
|
|
92 > kill nist process
|
|
93 > (optional)remove ~/.wine/drive_c/NIST_uid/
|
|
94 > finish
|
|
95 '''
|
|
96 # to avoid conflicts in the orphan process killing (see end of this method), we will
|
|
97 # only run NIST again after previous nistms.exe process has been killed:
|
|
98 # TODO : solution is currently only for wine (in the windows context the solution is not there yet, but parallel calls are not expected as in windows we only run tests one by one for now)
|
|
99 # if is_wine:
|
|
100 # while True:
|
|
101 # # check if process exists. If not, break loop and continue
|
|
102 # pid = utils.get_process_pid("nistms.exe")
|
|
103 # if pid == -1:
|
|
104 # break
|
|
105 # time.sleep(2)
|
|
106
|
|
107
|
|
108 # remove old file, if it is there:
|
|
109 file_to_monitor = new_nist_home+"/MSSEARCH/SRCREADY.TXT"
|
|
110 if os.path.exists(file_to_monitor):
|
|
111 os.remove(file_to_monitor)
|
|
112
|
|
113 exec_path = new_nist_home + "/MSSEARCH/nistms$.exe"
|
|
114
|
|
115 pro = ""
|
|
116 if is_wine:
|
|
117 print "calling wine with " + exec_path
|
|
118 cmd = ["wine "+ exec_path + " /INSTRUMENT /PAR=2"]
|
|
119 # The os.setsid() is passed in the argument preexec_fn so
|
|
120 # it's run after the fork() and before exec() to run the shell.
|
|
121 pro = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
|
122 shell=True, preexec_fn=os.setsid)
|
|
123 else:
|
|
124 cmd = [
|
|
125 exec_path,
|
|
126 "/INSTRUMENT",
|
|
127 "/PAR=2"]
|
|
128 subprocess.call(cmd)
|
|
129
|
|
130
|
5
|
131 timeSleeping = 0
|
0
|
132 # monitor process by checking state file:
|
6
|
133 while True and timeSleeping < 20:
|
0
|
134 # check if SRCREADY.TXT is there already:
|
|
135 if os.path.exists(file_to_monitor):
|
|
136 break
|
|
137 time.sleep(2)
|
5
|
138 timeSleeping =+ 2
|
0
|
139
|
|
140 # kill process:
|
|
141 #p.terminate() - not needed, nistm$ will terminate...nistms.exe is the one that
|
|
142 #stays open...and orphan..killing it:
|
|
143
|
|
144 if is_wine:
|
|
145 # pid = utils.get_process_pid("nistms.exe")
|
|
146 # os.kill(pid, 9)
|
|
147 os.killpg(pro.pid, 9)
|
|
148 else:
|
|
149 # windows case:
|
|
150 proc_name = "nistms.exe"
|
|
151 os.system("taskkill /f /im " + proc_name)
|
|
152
|
|
153 # copy SRCRESLT.TXT to output_file
|
|
154 result_file = new_nist_home+"/MSSEARCH/SRCRESLT.TXT"
|
|
155 utils.copy_file(result_file, output_file)
|
|
156
|
|
157
|
|
158 def _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict):
|
|
159 '''
|
|
160 This report will contain a page that displays essentially the same list as found in the
|
|
161 tabular output file (rendered with datatables jquery plugin), with some extra features:
|
|
162 - when user clicks on an entry, it should display the query spectrum and the hit spectrum
|
|
163 in "head to tail" and "difference" mode (see galaxy/report_example.png)
|
|
164 -> the query spectrum can be generated from the data in the input MSP file
|
|
165 -> the library "online representative" spectrum can be generated from data returned by http://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C537268&Index=0&Type=Mass ,
|
|
166 where C537268 in this case is the CAS ID without the '-' separators
|
|
167 '''
|
|
168 # step 1 : generate HTML via the jinja template engine
|
|
169 # step 1.1: make sure to link the query spectrum data to the corresponding html object for quick rendering when needed
|
|
170
|
|
171 html_file = open(output_html_report,'w')
|
|
172 html_render = ReportGenerator(os.path.dirname(__file__), 'templates/main_template.html',hits_dict, spectra_dict)
|
|
173 html_render.render(html_file)
|
|
174 # copy necessary .js files as well:
|
|
175 templates_folder = os.path.dirname(__file__) + '/templates/'
|
|
176 utils.copy_file(templates_folder + 'spectrum_gen.js', output_html_report_files_path+"/spectrum_gen.js")
|
|
177 utils.copy_dir(templates_folder + 'lib', output_html_report_files_path+'/lib' )
|
|
178 utils.copy_dir(templates_folder + 'images', output_html_report_files_path+'/images' )
|
|
179
|
|
180 html_file.close()
|
|
181
|
|
182
|
|
183
|
|
184
|
|
185
|
|
186 return None
|
|
187
|
|
188 def _get_extra_info_and_link_cols(data_found, data_type_found, query_link):
|
|
189 '''
|
|
190 This method will go over the data found and will return a
|
|
191 list with the following items:
|
|
192 - Experiment details where hits have been found :
|
|
193 'organism', 'tissue','experiment_name','user_name','column_type'
|
|
194 - Link that executes same query
|
|
195
|
|
196 '''
|
|
197 # set() makes a unique list:
|
|
198 organism_set = []
|
|
199 tissue_set = []
|
|
200 experiment_name_set = []
|
|
201 user_name_set = []
|
|
202 column_type_set = []
|
|
203 cas_nr_set = []
|
|
204
|
|
205 if 'organism' in data_found:
|
|
206 organism_set = set(data_found['organism'])
|
|
207 if 'tissue' in data_found:
|
|
208 tissue_set = set(data_found['tissue'])
|
|
209 if 'experiment_name' in data_found:
|
|
210 experiment_name_set = set(data_found['experiment_name'])
|
|
211 if 'user_name' in data_found:
|
|
212 user_name_set = set(data_found['user_name'])
|
|
213 if 'column_type' in data_found:
|
|
214 column_type_set = set(data_found['column_type'])
|
|
215 if 'CAS' in data_found:
|
|
216 cas_nr_set = set(data_found['CAS'])
|
|
217
|
|
218
|
|
219 result = [data_type_found,
|
|
220
|
|
221 #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"):
|
|
222 "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"]
|
|
223 return result
|
|
224
|
|
225
|
|
226
|
|
227
|
|
228
|
|
229 # alternative: ?
|
|
230 # s = requests.Session()
|
|
231 # s.verify = False
|
|
232 # #s.auth = (token01, token02)
|
|
233 # resp = s.get(url, params={'name': 'anonymous'}, stream=True)
|
|
234 # content = resp.content
|
|
235 # # transform to dictionary:
|
|
236
|
|
237
|
|
238
|
|
239 def _save_data(data_rows, headers, out_csv):
|
|
240 '''
|
|
241 Writes tab-separated data to file
|
|
242 @param data_rows: dictionary containing merged/enriched dataset
|
|
243 @param out_csv: output csv file
|
|
244 '''
|
|
245
|
|
246 # Open output file for writing
|
|
247 outfile_single_handle = open(out_csv, 'wb')
|
|
248 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
|
|
249
|
|
250 # Write headers
|
|
251 output_single_handle.writerow(headers)
|
|
252
|
|
253 # Write one line for each row
|
|
254 for data_row in data_rows:
|
|
255 output_single_handle.writerow(data_row)
|
|
256
|
|
257 def _get_metexp_URL(metexp_dblink_file):
|
|
258 '''
|
|
259 Read out and return the URL stored in the given file.
|
|
260 '''
|
|
261 file_input = fileinput.input(metexp_dblink_file)
|
|
262 try:
|
|
263 for line in file_input:
|
|
264 if line[0] != '#':
|
|
265 # just return the first line that is not a comment line:
|
|
266 return line
|
|
267 finally:
|
|
268 file_input.close()
|
|
269
|
|
270
|
|
271 def main():
|
|
272 '''
|
|
273 Wrapper main function
|
|
274
|
|
275 The input expected is:
|
|
276 NIST_HOME dir
|
|
277 nistms.INI
|
|
278 spectrum_file.msp
|
|
279 outputfileName
|
|
280 (optional) htmlReportFile
|
|
281 (optional) htmlReportFile.files_path
|
|
282 '''
|
|
283 seconds_start = int(round(time.time()))
|
|
284
|
|
285 nist_home_dir = sys.argv[1]
|
|
286 nist_ini_file = sys.argv[2]
|
|
287 spectrum_file = sys.argv[3]
|
|
288 nist_output_file = sys.argv[4]
|
|
289 final_output_file = sys.argv[5]
|
|
290 # html report pars:
|
|
291 output_html_report = None
|
|
292 output_html_report_files_path = None
|
|
293 if len(sys.argv) > 6:
|
|
294 output_html_report = sys.argv[6]
|
|
295 output_html_report_files_path = sys.argv[7]
|
|
296
|
|
297 is_wine = False
|
|
298 if "wine" in nist_home_dir:
|
|
299 is_wine = True
|
|
300
|
|
301 uuid_value = str(uuid.uuid4())
|
|
302
|
|
303 # prepare NIST environment for running:
|
|
304 new_nist_home = _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine)
|
|
305
|
|
306 # run NIST search command:
|
|
307 _run_NIST(new_nist_home, nist_output_file, is_wine)
|
|
308
|
|
309 # write output tabular:
|
|
310 hits_dict = utils.get_nist_out_as_dict(nist_output_file)
|
|
311 utils.save_dict_as_tsv(hits_dict, final_output_file)
|
|
312
|
|
313 # create report:
|
|
314 if len(sys.argv) > 6:
|
|
315 spectra_dict = utils.get_spectra_file_as_dict(spectrum_file)
|
|
316 _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict)
|
|
317
|
|
318
|
|
319 #_save_data(enriched_data, headers, output_result)
|
|
320
|
|
321 seconds_end = int(round(time.time()))
|
|
322 print "Took " + str(seconds_end - seconds_start) + " seconds"
|
|
323
|
|
324
|
|
325
|
|
326 if __name__ == '__main__':
|
|
327 main()
|