| 0 | 1 #!/usr/bin/env python | 
|  | 2 # encoding: utf-8 | 
|  | 3 ''' | 
|  | 4 Module wrapping the NIST MSSEARCH application for matching | 
|  | 5 spectra one or more spectra libraries. | 
|  | 6 ''' | 
|  | 7 import csv | 
|  | 8 import sys | 
|  | 9 import fileinput | 
|  | 10 import urllib2 | 
|  | 11 import time | 
|  | 12 import utils | 
|  | 13 import uuid | 
|  | 14 import os | 
|  | 15 import subprocess | 
|  | 16 from report_generator import ReportGenerator | 
|  | 17 | 
|  | 18 __author__ = "Pieter Lukasse" | 
|  | 19 __contact__ = "pieterlukasse@gmail.com,pieter.lukasse@wur.nl" | 
|  | 20 __copyright__ = "Copyright, 2015" | 
|  | 21 __license__ = "Apache v2" | 
|  | 22 | 
|  | 23 | 
|  | 24 | 
| 7 | 25 def _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine, log_file): | 
| 0 | 26     ''' | 
|  | 27      executes the following steps: | 
|  | 28      - copy  nist_home_dir folder to  nist_home_dir+_uuid | 
|  | 29      - copy spectrum_file.msp to  ~/.wine/drive_c/NIST_uid | 
|  | 30      - creates nist_home_dir+_uuid/MSSEARCH/AUTOIMP.MSD -> pointing to C:\NIST_uid\MSSEARCH\temp.msd (in case of is_wine) or to nist_home_dir+_uuidM\SSEARCH\temp.msd | 
|  | 31      - creates nist_home_dir+_uuid/MSSEARCH/temp.msd -> pointing to  C:\NIST_uid\spectrum_file.msp (in case of is_wine) or to nist_home_dir+_uuid\spectrum_file.msp | 
|  | 32        and the  text "10 724" in the second row | 
|  | 33      - copy nistms.INI to  nist_home_dir+_uuid/MSSEARCH , overwriting the existing one | 
|  | 34      - in case of is_wine: | 
|  | 35         replace all occurrences of C:\NIST with C:\NIST_uid in this new nistms.INI | 
|  | 36        else: | 
|  | 37         replace all occurrences of C:\NIST with nist_home_dir+_uuid in this new nistms.INI | 
|  | 38     ''' | 
|  | 39     if nist_home_dir.endswith("/") or nist_home_dir.endswith("\\"): | 
|  | 40         nist_home_dir = nist_home_dir[:-1] | 
|  | 41 | 
|  | 42     # small validation for wine scenario | 
|  | 43     if is_wine and not nist_home_dir.endswith("drive_c/NIST"): | 
|  | 44         raise Exception('Error: invalid NIST home. For wine usage NIST home dir must be in the .wine folder and then in drive_c/NIST') | 
|  | 45 | 
|  | 46     new_nist_home = nist_home_dir+uuid_value | 
|  | 47     utils.copy_dir(nist_home_dir, new_nist_home) | 
|  | 48 | 
|  | 49     utils.copy_file(spectrum_file, new_nist_home+"/spectrum_file.msp") | 
|  | 50 | 
| 9 | 51     utils.log_message(log_file, "configuring NIST input...") | 
| 0 | 52     # remove old file: | 
|  | 53     os.remove(new_nist_home+"/MSSEARCH/AUTOIMP.MSD") | 
|  | 54     with open(new_nist_home + "/MSSEARCH/AUTOIMP.MSD", "a") as text_file: | 
|  | 55         if is_wine: | 
|  | 56             text_file.write("C:\\NIST" + uuid_value + "\\MSSEARCH\\temp.msd") | 
|  | 57         else: | 
|  | 58             text_file.write(new_nist_home + "\\MSSEARCH\\temp.msd") | 
|  | 59 | 
|  | 60     with open(new_nist_home + "/MSSEARCH/temp.msd", "a") as text_file: | 
|  | 61         if is_wine: | 
|  | 62             text_file.write("C:\\NIST" + uuid_value + "\\spectrum_file.msp\n") | 
|  | 63         else: | 
|  | 64             text_file.write(new_nist_home + "\\spectrum_file.msp\n") | 
|  | 65         text_file.write("10 724") | 
|  | 66 | 
|  | 67     replacement_text = new_nist_home | 
|  | 68     if is_wine: | 
|  | 69         replacement_text = "C:\\NIST" + uuid_value | 
|  | 70 | 
|  | 71     # remove old file | 
|  | 72     os.remove(new_nist_home+"/MSSEARCH/nistms.INI") | 
|  | 73     # make new one | 
| 9 | 74     utils.log_message(log_file, "configuring NIST ini...") | 
| 0 | 75     o = open(new_nist_home+"/MSSEARCH/nistms.INI","a") #open for append | 
|  | 76     # TODO : this loop/replace below is a bit limited to specific variables...either test different NIST versions or make more generic (harder in case of wine, or we need extra "home in .INI file"  parameter): | 
|  | 77     for line in open(nist_ini_file): | 
|  | 78         if "Library Directory=" in line: | 
|  | 79             line = "Library Directory="+ new_nist_home + "\\MSSEARCH\\\n" | 
|  | 80         if "Dir=" in line: | 
|  | 81             line = "Dir="+ replacement_text + "\\MSSEARCH\\\n" | 
|  | 82 | 
|  | 83         o.write(line) | 
|  | 84     o.close() | 
|  | 85 | 
|  | 86     return new_nist_home | 
|  | 87 | 
| 16 | 88 def _clean_up_NIST(new_nist_home): | 
|  | 89     ''' | 
|  | 90     remove folder | 
|  | 91     ''' | 
|  | 92     utils.remove_dir(new_nist_home) | 
|  | 93 | 
| 14 | 94 def _run_NIST(new_nist_home, output_file, is_wine, log_file, job_size): | 
| 0 | 95     ''' | 
|  | 96       - run : (wine) new_nist_home/MSSEARCH/nistms$.exe /INSTRUMENT /PAR=2 | 
|  | 97       - monitor : new_nist_home/MSSEARCH/SRCREADY.TXT for content = "1" | 
|  | 98          - when ready: | 
|  | 99            > copy SRCRESLT.TXT to output_file | 
|  | 100            > kill nist process | 
|  | 101            > (optional)remove ~/.wine/drive_c/NIST_uid/ | 
|  | 102            > finish | 
|  | 103     ''' | 
|  | 104     # to avoid conflicts in the orphan process killing (see end of this method), we will | 
|  | 105     # only run NIST again after previous nistms.exe process has been killed: | 
|  | 106     # TODO : solution is currently only for wine (in the windows context the solution is not there yet, but parallel calls are not expected as in windows we only run tests one by one for now) | 
|  | 107 | 
|  | 108 | 
|  | 109     # remove old file, if it is there: | 
|  | 110     file_to_monitor = new_nist_home+"/MSSEARCH/SRCREADY.TXT" | 
|  | 111     if os.path.exists(file_to_monitor): | 
|  | 112         os.remove(file_to_monitor) | 
|  | 113 | 
|  | 114     exec_path = new_nist_home + "/MSSEARCH/nistms$.exe" | 
|  | 115 | 
|  | 116     pro = "" | 
|  | 117     if is_wine: | 
| 9 | 118         utils.log_message(log_file, "calling wine with " + exec_path) | 
| 0 | 119         cmd = ["wine "+ exec_path + " /INSTRUMENT /PAR=2"] | 
|  | 120         # The os.setsid() is passed in the argument preexec_fn so | 
|  | 121         # it's run after the fork() and before  exec() to run the shell. | 
|  | 122         pro = subprocess.Popen(cmd, stdout=subprocess.PIPE, | 
|  | 123                            shell=True, preexec_fn=os.setsid) | 
|  | 124     else: | 
|  | 125         cmd = [ | 
|  | 126             exec_path, | 
|  | 127             "/INSTRUMENT", | 
|  | 128             "/PAR=2"] | 
|  | 129         subprocess.call(cmd) | 
|  | 130 | 
|  | 131 | 
| 5 | 132     timeSleeping = 0 | 
| 18 | 133     startUpTime = 60 | 
| 0 | 134     # monitor process by checking state file: | 
| 9 | 135     utils.log_message(log_file, "monitoring SRCREADY.TXT...") | 
| 14 | 136     while True: | 
| 0 | 137         # check if SRCREADY.TXT is there already: | 
|  | 138         if os.path.exists(file_to_monitor): | 
|  | 139             break | 
|  | 140         time.sleep(2) | 
| 10 | 141         timeSleeping = timeSleeping+2 | 
| 14 | 142         if timeSleeping > (startUpTime + job_size*2): | 
|  | 143             # abort with timeout: | 
| 17 | 144             utils.log_message(log_file, "No results after " + str(timeSleeping) + " seconds...") | 
| 20 | 145             _kill_NIST(pro, is_wine) | 
| 19 | 146             # uggly workaround: | 
|  | 147             utils.kill_process_by_name("nistms.exe") | 
|  | 148             utils.kill_process_by_name("nistms$.exe") | 
| 14 | 149             raise Exception('Error: timeout waiting for NIST results.') | 
| 0 | 150 | 
| 17 | 151     utils.log_message(log_file, "done...") | 
| 0 | 152     # kill process: | 
|  | 153     #p.terminate() - not needed, nistm$ will terminate...nistms.exe is the one that | 
|  | 154     #stays open...and orphan..killing it: | 
| 20 | 155     _kill_NIST(pro, is_wine) | 
|  | 156 | 
|  | 157     # copy SRCRESLT.TXT to output_file | 
|  | 158     result_file = new_nist_home+"/MSSEARCH/SRCRESLT.TXT" | 
|  | 159     utils.copy_file(result_file, output_file) | 
| 0 | 160 | 
| 20 | 161 | 
|  | 162 | 
|  | 163 def _kill_NIST(process_group, is_wine): | 
|  | 164     ''' | 
|  | 165     nistm$ will terminate...nistms.exe is the one that | 
|  | 166     stays open...and orphan. This method kills it. | 
|  | 167     ''' | 
| 0 | 168     if is_wine: | 
|  | 169 #         pid = utils.get_process_pid("nistms.exe") | 
|  | 170 #         os.kill(pid, 9) | 
| 20 | 171         os.killpg(process_group.pid, 9) | 
| 0 | 172     else: | 
|  | 173         # windows case: | 
|  | 174         proc_name = "nistms.exe" | 
|  | 175         os.system("taskkill /f /im " + proc_name) | 
|  | 176 | 
| 20 | 177 | 
| 0 | 178 | 
|  | 179 | 
|  | 180 def _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict): | 
|  | 181     ''' | 
|  | 182     This report will contain a page that displays essentially the same list as found in the | 
|  | 183     tabular output file (rendered with datatables jquery plugin), with some extra features: | 
|  | 184      - when user clicks on an entry, it should display the query spectrum and the hit spectrum | 
|  | 185        in "head to tail" and "difference" mode (see galaxy/report_example.png) | 
|  | 186         -> the query spectrum can be generated from the data in the input MSP file | 
|  | 187         -> the library "online representative" spectrum can be generated from data returned by http://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C537268&Index=0&Type=Mass , | 
|  | 188            where C537268 in this case is the CAS ID without the '-' separators | 
|  | 189     ''' | 
|  | 190     # step 1 : generate HTML via the jinja template engine | 
|  | 191     # step 1.1: make sure to link the query spectrum data to the corresponding html object for quick rendering when needed | 
| 14 | 192 | 
|  | 193     # copy necessary .js files as well: | 
|  | 194     templates_folder = os.path.dirname(__file__) + '/templates/' | 
|  | 195     utils.copy_dir(templates_folder, output_html_report_files_path) | 
| 0 | 196 | 
|  | 197     html_file = open(output_html_report,'w') | 
|  | 198     html_render = ReportGenerator(os.path.dirname(__file__), 'templates/main_template.html',hits_dict, spectra_dict) | 
|  | 199     html_render.render(html_file) | 
|  | 200 | 
|  | 201     html_file.close() | 
|  | 202 | 
|  | 203 | 
|  | 204 | 
|  | 205 | 
|  | 206 | 
|  | 207     return None | 
|  | 208 | 
|  | 209 def _get_extra_info_and_link_cols(data_found, data_type_found, query_link): | 
|  | 210     ''' | 
|  | 211     This method will go over the data found and will return a | 
|  | 212     list with the following items: | 
|  | 213     - Experiment details where hits have been found : | 
|  | 214         'organism', 'tissue','experiment_name','user_name','column_type' | 
|  | 215     - Link that executes same query | 
|  | 216 | 
|  | 217     ''' | 
|  | 218     # set() makes a unique list: | 
|  | 219     organism_set = [] | 
|  | 220     tissue_set = [] | 
|  | 221     experiment_name_set = [] | 
|  | 222     user_name_set = [] | 
|  | 223     column_type_set = [] | 
|  | 224     cas_nr_set = [] | 
|  | 225 | 
|  | 226     if 'organism' in data_found: | 
|  | 227         organism_set = set(data_found['organism']) | 
|  | 228     if 'tissue' in data_found: | 
|  | 229         tissue_set = set(data_found['tissue']) | 
|  | 230     if 'experiment_name' in data_found: | 
|  | 231         experiment_name_set = set(data_found['experiment_name']) | 
|  | 232     if 'user_name' in data_found: | 
|  | 233         user_name_set = set(data_found['user_name']) | 
|  | 234     if 'column_type' in data_found: | 
|  | 235         column_type_set = set(data_found['column_type']) | 
|  | 236     if 'CAS' in data_found: | 
|  | 237         cas_nr_set = set(data_found['CAS']) | 
|  | 238 | 
|  | 239 | 
|  | 240     result = [data_type_found, | 
|  | 241 | 
|  | 242               #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"): | 
|  | 243               "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"] | 
|  | 244     return result | 
|  | 245 | 
|  | 246 | 
|  | 247 | 
|  | 248 | 
|  | 249 | 
|  | 250 # alternative: ? | 
|  | 251 #     s = requests.Session() | 
|  | 252 #     s.verify = False | 
|  | 253 #     #s.auth = (token01, token02) | 
|  | 254 #     resp = s.get(url, params={'name': 'anonymous'}, stream=True) | 
|  | 255 #     content = resp.content | 
|  | 256 #     # transform to dictionary: | 
|  | 257 | 
|  | 258 | 
|  | 259 | 
|  | 260 def _save_data(data_rows, headers, out_csv): | 
|  | 261     ''' | 
|  | 262     Writes tab-separated data to file | 
|  | 263     @param data_rows: dictionary containing merged/enriched dataset | 
|  | 264     @param out_csv: output csv file | 
|  | 265     ''' | 
|  | 266 | 
|  | 267     # Open output file for writing | 
|  | 268     outfile_single_handle = open(out_csv, 'wb') | 
|  | 269     output_single_handle = csv.writer(outfile_single_handle, delimiter="\t") | 
|  | 270 | 
|  | 271     # Write headers | 
|  | 272     output_single_handle.writerow(headers) | 
|  | 273 | 
|  | 274     # Write one line for each row | 
|  | 275     for data_row in data_rows: | 
|  | 276         output_single_handle.writerow(data_row) | 
|  | 277 | 
|  | 278 def _get_metexp_URL(metexp_dblink_file): | 
|  | 279     ''' | 
|  | 280     Read out and return the URL stored in the given file. | 
|  | 281     ''' | 
|  | 282     file_input = fileinput.input(metexp_dblink_file) | 
|  | 283     try: | 
|  | 284         for line in file_input: | 
|  | 285             if line[0] != '#': | 
|  | 286                 # just return the first line that is not a comment line: | 
|  | 287                 return line | 
|  | 288     finally: | 
|  | 289         file_input.close() | 
|  | 290 | 
|  | 291 | 
|  | 292 def main(): | 
|  | 293     ''' | 
|  | 294     Wrapper main function | 
|  | 295 | 
|  | 296     The input expected is: | 
|  | 297      NIST_HOME dir | 
|  | 298      nistms.INI | 
|  | 299      spectrum_file.msp | 
|  | 300      outputfileName | 
|  | 301      (optional) htmlReportFile | 
|  | 302      (optional) htmlReportFile.files_path | 
|  | 303     ''' | 
|  | 304     seconds_start = int(round(time.time())) | 
|  | 305 | 
|  | 306     nist_home_dir = sys.argv[1] | 
|  | 307     nist_ini_file = sys.argv[2] | 
|  | 308     spectrum_file = sys.argv[3] | 
|  | 309     nist_output_file = sys.argv[4] | 
|  | 310     final_output_file = sys.argv[5] | 
| 7 | 311     output_log_file = sys.argv[6] | 
| 0 | 312     # html report pars: | 
|  | 313     output_html_report = None | 
|  | 314     output_html_report_files_path = None | 
| 7 | 315     if len(sys.argv) > 7: | 
|  | 316         output_html_report = sys.argv[7] | 
|  | 317         output_html_report_files_path = sys.argv[8] | 
| 0 | 318 | 
|  | 319     is_wine = False | 
|  | 320     if "wine" in nist_home_dir: | 
|  | 321         is_wine = True | 
|  | 322 | 
|  | 323     uuid_value = str(uuid.uuid4()) | 
| 9 | 324 | 
| 0 | 325     # prepare NIST environment for running: | 
| 9 | 326     new_nist_home = _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine, output_log_file) | 
| 0 | 327 | 
|  | 328     # run NIST search command: | 
| 14 | 329     spectra_dict = utils.get_spectra_file_as_dict(spectrum_file) | 
|  | 330     job_size = len(spectra_dict) | 
|  | 331     _run_NIST(new_nist_home, nist_output_file, is_wine, output_log_file, job_size) | 
| 0 | 332 | 
| 16 | 333     # clean-up NIST environment: | 
|  | 334     _clean_up_NIST(new_nist_home) | 
|  | 335 | 
| 0 | 336     # write output tabular: | 
|  | 337     hits_dict = utils.get_nist_out_as_dict(nist_output_file) | 
|  | 338     utils.save_dict_as_tsv(hits_dict, final_output_file) | 
|  | 339 | 
|  | 340     # create report: | 
| 14 | 341     if len(sys.argv) > 7: | 
| 0 | 342         _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict) | 
|  | 343 | 
|  | 344 | 
|  | 345     #_save_data(enriched_data, headers, output_result) | 
|  | 346 | 
|  | 347     seconds_end = int(round(time.time())) | 
|  | 348     print "Took " + str(seconds_end - seconds_start) + " seconds" | 
|  | 349 | 
|  | 350 | 
|  | 351 | 
|  | 352 if __name__ == '__main__': | 
|  | 353     main() |