diff nist_wrapper.py @ 0:cce6989ed423

new NIST wrapper demo tools
author pieter.lukasse@wur.nl
date Thu, 22 Jan 2015 16:14:57 +0100
parents
children c3dc158717fc
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nist_wrapper.py	Thu Jan 22 16:14:57 2015 +0100
@@ -0,0 +1,325 @@
+#!/usr/bin/env python
+# encoding: utf-8
+'''
+Module wrapping the NIST MSSEARCH application for matching 
+spectra one or more spectra libraries.   
+'''
+import csv
+import sys
+import fileinput
+import urllib2
+import time
+import utils
+import uuid
+import os
+import subprocess
+from report_generator import ReportGenerator
+
+__author__ = "Pieter Lukasse"
+__contact__ = "pieterlukasse@gmail.com,pieter.lukasse@wur.nl"
+__copyright__ = "Copyright, 2015"
+__license__ = "Apache v2"
+
+
+
+def _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine):
+    '''
+     executes the following steps:
+     - copy  nist_home_dir folder to  nist_home_dir+_uuid
+     - copy spectrum_file.msp to  ~/.wine/drive_c/NIST_uid
+     - creates nist_home_dir+_uuid/MSSEARCH/AUTOIMP.MSD -> pointing to C:\NIST_uid\MSSEARCH\temp.msd (in case of is_wine) or to nist_home_dir+_uuidM\SSEARCH\temp.msd
+     - creates nist_home_dir+_uuid/MSSEARCH/temp.msd -> pointing to  C:\NIST_uid\spectrum_file.msp (in case of is_wine) or to nist_home_dir+_uuid\spectrum_file.msp
+       and the  text "10 724" in the second row
+     - copy nistms.INI to  nist_home_dir+_uuid/MSSEARCH , overwriting the existing one
+     - in case of is_wine:
+        replace all occurrences of C:\NIST with C:\NIST_uid in this new nistms.INI
+       else:
+        replace all occurrences of C:\NIST with nist_home_dir+_uuid in this new nistms.INI
+    '''
+    if nist_home_dir.endswith("/") or nist_home_dir.endswith("\\"):
+        nist_home_dir = nist_home_dir[:-1]
+        
+    # small validation for wine scenario
+    if is_wine and not nist_home_dir.endswith("drive_c/NIST"):
+        raise Exception('Error: invalid NIST home. For wine usage NIST home dir must be in the .wine folder and then in drive_c/NIST')
+            
+    new_nist_home = nist_home_dir+uuid_value
+    utils.copy_dir(nist_home_dir, new_nist_home)
+    
+    utils.copy_file(spectrum_file, new_nist_home+"/spectrum_file.msp")
+    
+    # remove old file:
+    os.remove(new_nist_home+"/MSSEARCH/AUTOIMP.MSD")
+    with open(new_nist_home + "/MSSEARCH/AUTOIMP.MSD", "a") as text_file:
+        if is_wine:
+            text_file.write("C:\\NIST" + uuid_value + "\\MSSEARCH\\temp.msd")
+        else:
+            text_file.write(new_nist_home + "\\MSSEARCH\\temp.msd")
+    
+    with open(new_nist_home + "/MSSEARCH/temp.msd", "a") as text_file:
+        if is_wine:
+            text_file.write("C:\\NIST" + uuid_value + "\\spectrum_file.msp\n")
+        else:
+            text_file.write(new_nist_home + "\\spectrum_file.msp\n")
+        text_file.write("10 724")
+    
+    replacement_text = new_nist_home
+    if is_wine:
+        replacement_text = "C:\\NIST" + uuid_value
+    
+    # remove old file
+    os.remove(new_nist_home+"/MSSEARCH/nistms.INI")
+    # make new one
+    o = open(new_nist_home+"/MSSEARCH/nistms.INI","a") #open for append
+    # TODO : this loop/replace below is a bit limited to specific variables...either test different NIST versions or make more generic (harder in case of wine, or we need extra "home in .INI file"  parameter):
+    for line in open(nist_ini_file):
+        if "Library Directory=" in line:
+            line = "Library Directory="+ new_nist_home + "\\MSSEARCH\\\n"
+        if "Dir=" in line:
+            line = "Dir="+ replacement_text + "\\MSSEARCH\\\n"
+
+        o.write(line) 
+    o.close()
+    
+    return new_nist_home
+
+def _run_NIST(new_nist_home, output_file, is_wine):
+    '''
+      - run : (wine) new_nist_home/MSSEARCH/nistms$.exe /INSTRUMENT /PAR=2
+      - monitor : new_nist_home/MSSEARCH/SRCREADY.TXT for content = "1"
+         - when ready:
+           > copy SRCRESLT.TXT to output_file
+           > kill nist process
+           > (optional)remove ~/.wine/drive_c/NIST_uid/
+           > finish
+    '''
+    # to avoid conflicts in the orphan process killing (see end of this method), we will 
+    # only run NIST again after previous nistms.exe process has been killed:
+    # TODO : solution is currently only for wine (in the windows context the solution is not there yet, but parallel calls are not expected as in windows we only run tests one by one for now)
+#     if is_wine:
+#         while True:
+#             # check if process exists. If not, break loop and continue
+#             pid = utils.get_process_pid("nistms.exe")
+#             if pid == -1:
+#                 break
+#             time.sleep(2)
+    
+    
+    # remove old file, if it is there:
+    file_to_monitor = new_nist_home+"/MSSEARCH/SRCREADY.TXT"
+    if os.path.exists(file_to_monitor):
+        os.remove(file_to_monitor)
+    
+    exec_path = new_nist_home + "/MSSEARCH/nistms$.exe" 
+    
+    pro = ""
+    if is_wine:
+        print "calling wine with " + exec_path
+        cmd = ["wine "+ exec_path + " /INSTRUMENT /PAR=2"]
+        # The os.setsid() is passed in the argument preexec_fn so
+        # it's run after the fork() and before  exec() to run the shell.
+        pro = subprocess.Popen(cmd, stdout=subprocess.PIPE, 
+                           shell=True, preexec_fn=os.setsid)
+    else:
+        cmd = [
+            exec_path,
+            "/INSTRUMENT",
+            "/PAR=2"]
+        subprocess.call(cmd)
+     
+    
+    # monitor process by checking state file:
+    while True:
+        # check if SRCREADY.TXT is there already:
+        if os.path.exists(file_to_monitor):
+            break
+        time.sleep(2)
+        
+    # kill process:
+    #p.terminate() - not needed, nistm$ will terminate...nistms.exe is the one that 
+    #stays open...and orphan..killing it:
+    
+    if is_wine:
+#         pid = utils.get_process_pid("nistms.exe")
+#         os.kill(pid, 9)
+        os.killpg(pro.pid, 9)    
+    else:
+        # windows case:
+        proc_name = "nistms.exe"
+        os.system("taskkill /f /im " + proc_name)
+  
+    # copy SRCRESLT.TXT to output_file
+    result_file = new_nist_home+"/MSSEARCH/SRCRESLT.TXT"
+    utils.copy_file(result_file, output_file)
+    
+
+def _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict):
+    '''
+    This report will contain a page that displays essentially the same list as found in the 
+    tabular output file (rendered with datatables jquery plugin), with some extra features:
+     - when user clicks on an entry, it should display the query spectrum and the hit spectrum
+       in "head to tail" and "difference" mode (see galaxy/report_example.png)
+        -> the query spectrum can be generated from the data in the input MSP file
+        -> the library "online representative" spectrum can be generated from data returned by http://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C537268&Index=0&Type=Mass ,
+           where C537268 in this case is the CAS ID without the '-' separators
+    '''
+    # step 1 : generate HTML via the jinja template engine
+    # step 1.1: make sure to link the query spectrum data to the corresponding html object for quick rendering when needed
+    
+    html_file = open(output_html_report,'w')
+    html_render = ReportGenerator(os.path.dirname(__file__), 'templates/main_template.html',hits_dict, spectra_dict)
+    html_render.render(html_file)
+    # copy necessary .js files as well:
+    templates_folder = os.path.dirname(__file__) + '/templates/'
+    utils.copy_file(templates_folder + 'spectrum_gen.js', output_html_report_files_path+"/spectrum_gen.js")
+    utils.copy_dir(templates_folder + 'lib', output_html_report_files_path+'/lib' )
+    utils.copy_dir(templates_folder + 'images', output_html_report_files_path+'/images' )
+    
+    html_file.close()
+
+     
+     
+     
+    
+    return None
+
+def _get_extra_info_and_link_cols(data_found, data_type_found, query_link):
+    '''
+    This method will go over the data found and will return a 
+    list with the following items:
+    - Experiment details where hits have been found :
+        'organism', 'tissue','experiment_name','user_name','column_type'
+    - Link that executes same query
+        
+    '''
+    # set() makes a unique list:
+    organism_set = []
+    tissue_set = []
+    experiment_name_set = []
+    user_name_set = []
+    column_type_set = []
+    cas_nr_set = []
+    
+    if 'organism' in data_found:
+        organism_set = set(data_found['organism'])
+    if 'tissue' in data_found:
+        tissue_set = set(data_found['tissue'])
+    if 'experiment_name' in data_found:
+        experiment_name_set = set(data_found['experiment_name'])
+    if 'user_name' in data_found:
+        user_name_set = set(data_found['user_name'])
+    if 'column_type' in data_found:
+        column_type_set = set(data_found['column_type'])
+    if 'CAS' in data_found:
+        cas_nr_set = set(data_found['CAS'])        
+    
+    
+    result = [data_type_found,
+                            
+              #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"): 
+              "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"]
+    return result
+
+
+
+    
+    
+# alternative: ?    
+#     s = requests.Session()
+#     s.verify = False
+#     #s.auth = (token01, token02)
+#     resp = s.get(url, params={'name': 'anonymous'}, stream=True)
+#     content = resp.content
+#     # transform to dictionary:
+    
+
+
+def _save_data(data_rows, headers, out_csv):
+    '''
+    Writes tab-separated data to file
+    @param data_rows: dictionary containing merged/enriched dataset
+    @param out_csv: output csv file
+    '''
+
+    # Open output file for writing
+    outfile_single_handle = open(out_csv, 'wb')
+    output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
+
+    # Write headers
+    output_single_handle.writerow(headers)
+
+    # Write one line for each row
+    for data_row in data_rows:
+        output_single_handle.writerow(data_row)
+
+def _get_metexp_URL(metexp_dblink_file):
+    '''
+    Read out and return the URL stored in the given file.
+    '''
+    file_input = fileinput.input(metexp_dblink_file)
+    try:
+        for line in file_input:
+            if line[0] != '#':
+                # just return the first line that is not a comment line:
+                return line
+    finally:
+        file_input.close()
+    
+
+def main():
+    '''
+    Wrapper main function
+    
+    The input expected is:
+     NIST_HOME dir
+     nistms.INI
+     spectrum_file.msp 
+     outputfileName 
+     (optional) htmlReportFile
+     (optional) htmlReportFile.files_path
+    '''
+    seconds_start = int(round(time.time()))
+    
+    nist_home_dir = sys.argv[1]
+    nist_ini_file = sys.argv[2]
+    spectrum_file = sys.argv[3]
+    nist_output_file = sys.argv[4]
+    final_output_file = sys.argv[5]
+    # html report pars:
+    output_html_report = None
+    output_html_report_files_path = None
+    if len(sys.argv) > 6:
+        output_html_report = sys.argv[6]
+        output_html_report_files_path = sys.argv[7]
+    
+    is_wine = False
+    if "wine" in nist_home_dir:
+        is_wine = True
+    
+    uuid_value = str(uuid.uuid4()) 
+
+    # prepare NIST environment for running:
+    new_nist_home = _prepare_NIST(uuid_value, nist_home_dir, nist_ini_file, spectrum_file, is_wine)
+    
+    # run NIST search command:
+    _run_NIST(new_nist_home, nist_output_file, is_wine)
+
+    # write output tabular:
+    hits_dict = utils.get_nist_out_as_dict(nist_output_file) 
+    utils.save_dict_as_tsv(hits_dict, final_output_file)
+    
+    # create report:
+    if len(sys.argv) > 6:
+        spectra_dict = utils.get_spectra_file_as_dict(spectrum_file)
+        _create_html_report(output_html_report, output_html_report_files_path, hits_dict, spectra_dict)
+
+
+    #_save_data(enriched_data, headers, output_result)
+    
+    seconds_end = int(round(time.time()))
+    print "Took " + str(seconds_end - seconds_start) + " seconds"
+                      
+                      
+
+if __name__ == '__main__':
+    main()