nist_wrapper: utils.py comparison

comparison utils.py @ 0:cce6989ed423

new NIST wrapper demo tools

author	pieter.lukasse@wur.nl
date	Thu, 22 Jan 2015 16:14:57 +0100
parents
children	8c20185752da

comparison

equal deleted inserted replaced

--1:000000000000
+:cce6989ed423
+'''
+Created on 31 dec. 2014
+@author: lukas007
+'''
+import shutil
+import subprocess
+import csv
+from collections import OrderedDict
+def copy_dir(src, dst):
+shutil.copytree(src, dst)
+def copy_file(src, dst):
+shutil.copy(src, dst)
+def get_process_list():
+p = subprocess.Popen(['ps', '-A'], stdout=subprocess.PIPE)
+out, err = p.communicate()
+return out.splitlines()
+def get_process_pid(process_name):
+pid = -1
+for line in get_process_list():
+if process_name in line:
+pid = int(line.split(None, 1)[0])
+return pid
+def get_as_dict(in_tsv):
+'''
+Generic method to parse a tab-separated file returning a dictionary with named columns
+@param in_tsv: input filename to be parsed
+'''
+data = list(csv.reader(open(in_tsv, 'rU'), delimiter='\t'))
+header = data.pop(0)
+# Create dictionary with column name as key
+output = {}
+for index in xrange(len(header)):
+output[header[index]] = [row[index] for row in data]
+return output
+def save_dict_as_tsv(dict, out_tsv):
+'''
+Writes tab-separated data to file
+@param data: dictionary containing merged dataset
+@param out_tsv: output tsv file
+'''
+# Open output file for writing
+out_file = open(out_tsv, 'wb')
+output_writer = csv.writer(out_file, delimiter="\t")
+# Write headers
+output_writer.writerow(list(dict.keys()))
+# Write
+for record_index in xrange(len(dict[dict.keys()[0]])):
+row = [dict[k][record_index] for k in dict]
+output_writer.writerow(row)
+def get_nist_out_as_dict(nist_result_file):
+'''
+Method to parse NIST specific output into a dictionary.
+@param nist_result_file: result file as produced by NIST nistms$.exe
+'''
+# Create dictionary with column name as key
+output = OrderedDict()
+output['id'] = []
+output['compound_name'] = []
+output['formula'] = []
+output['lib_name'] = []
+output['id_in_lib'] = []
+output['mf'] = []
+output['rmf'] = []
+output['prob'] = []
+output['cas'] = []
+output['mw'] = []
+for line in open(nist_result_file):
+row = line.split('<<')
+if row[0].startswith('Unknown'):
+title_row = row[0]
+continue
+elif row[0].startswith('Hit'):
+hit = row
+output['id'].append(title_row.split(': ')[1].split('  ')[0])
+output['compound_name'].append((hit[1].split('>>')[0]).decode('utf-8', errors='replace')) # see http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
+output['formula'].append(hit[2].split('>>')[0])
+output['lib_name'].append(hit[3].split('>>')[0])
+other_fields_list = (hit[2].split('>>')[1] + hit[3].split('>>')[1]).split(';')
+count = 0
+for field in other_fields_list:
+if field.startswith(' MF: '):
+count += 1
+output['mf'].append(field.split('MF: ')[1])
+elif field.startswith(' RMF: '):
+count += 1
+output['rmf'].append(field.split('RMF: ')[1])
+elif field.startswith(' Prob: '):
+count += 1
+output['prob'].append(field.split('Prob: ')[1])
+elif field.startswith(' CAS:'):
+count += 1
+output['cas'].append(field.split('CAS:')[1])
+elif field.startswith(' Mw: '):
+count += 1
+output['mw'].append(field.split('Mw: ')[1])
+elif field.startswith(' Id: '):
+count += 1
+output['id_in_lib'].append(field.split('Id: ')[1][0:-2])  # the [0:-2] is to avoid the last 2 characters, namely a '.' and a \n
+elif field != '' and field != ' Lib: ':
+raise Exception('Error: unexpected field in NIST output: ' + field)
+if count != 6:
+raise Exception('Error: did not find all expected fields in NIST output')
+return output
+def get_spectra_file_as_dict(spectrum_file):
+'''
+Method to parse spectra file in NIST MSP input format into a dictionary.
+The idea is to parse the following :
+Name: spectrum1
+DB#: 1
+Num Peaks: 87
+14 8; 15 15; 27 18; 28 15; 29 15;
+30 11; 32 19; 39 32; 40 12; 41 68;
+into:
+dict['spectrum1'] = "14 8; 15 15; 27 18; 28 15; 29 15; 30 11; 32 19; 39 32; 40 12; 41 68;"
+@param spectrum_file: spectra file in MSP format (e.g. also the format returned by MsClust)
+'''
+output = OrderedDict()
+name = ''
+spectrum = ''
+for line in open(spectrum_file):
+if line.startswith('Name: '):
+if name != '':
+# store spectrum:
+output[name] = spectrum
+name = line.split('Name: ')[1].replace('\n','')
+spectrum = ''
+elif line[0].isdigit():
+# parse spectra:
+spectrum += line.replace('\n','')
+# store also last spectrum:
+output[name] = spectrum
+return output

Mercurial > repos > pieterlukasse > nist_wrapper

comparison utils.py @ 0:cce6989ed423