0
|
1 '''
|
|
2 Created on 31 dec. 2014
|
|
3
|
|
4 @author: lukas007
|
|
5 '''
|
|
6 import shutil
|
|
7 import subprocess
|
|
8 import csv
|
16
|
9 import os
|
|
10 import stat
|
0
|
11 from collections import OrderedDict
|
|
12
|
|
13 def copy_dir(src, dst):
|
|
14 shutil.copytree(src, dst)
|
|
15
|
16
|
16 def _del_rw(action, name, exc):
|
|
17 '''
|
|
18 ensures the read only files are set to read/write
|
|
19 and then deletes them
|
|
20 '''
|
|
21 os.chmod(name, stat.S_IWRITE)
|
|
22 os.remove(name)
|
|
23
|
|
24 def remove_dir(src):
|
|
25 shutil.rmtree(src, onerror=_del_rw)
|
|
26
|
|
27
|
9
|
28 def log_message(log_file, log_message):
|
|
29 with open(log_file, "a") as text_file:
|
|
30 text_file.write(log_message + "\n")
|
|
31
|
0
|
32 def copy_file(src, dst):
|
|
33 shutil.copy(src, dst)
|
|
34
|
|
35 def get_process_list():
|
|
36 p = subprocess.Popen(['ps', '-A'], stdout=subprocess.PIPE)
|
|
37 out, err = p.communicate()
|
|
38 return out.splitlines()
|
|
39
|
|
40 def get_process_pid(process_name):
|
|
41 pid = -1
|
|
42 for line in get_process_list():
|
|
43 if process_name in line:
|
|
44 pid = int(line.split(None, 1)[0])
|
|
45 return pid
|
|
46
|
19
|
47 def kill_process_by_name(process_name):
|
|
48 pid = -1
|
|
49 for line in get_process_list():
|
|
50 if process_name in line:
|
|
51 pid = int(line.split(None, 1)[0])
|
|
52 os.kill(pid, 9)
|
|
53
|
0
|
54
|
|
55 def get_as_dict(in_tsv):
|
|
56 '''
|
|
57 Generic method to parse a tab-separated file returning a dictionary with named columns
|
|
58 @param in_tsv: input filename to be parsed
|
|
59 '''
|
|
60 data = list(csv.reader(open(in_tsv, 'rU'), delimiter='\t'))
|
|
61 header = data.pop(0)
|
|
62 # Create dictionary with column name as key
|
|
63 output = {}
|
|
64 for index in xrange(len(header)):
|
|
65 output[header[index]] = [row[index] for row in data]
|
|
66 return output
|
|
67
|
|
68 def save_dict_as_tsv(dict, out_tsv):
|
|
69 '''
|
|
70 Writes tab-separated data to file
|
|
71 @param data: dictionary containing merged dataset
|
|
72 @param out_tsv: output tsv file
|
|
73 '''
|
|
74
|
|
75 # Open output file for writing
|
|
76 out_file = open(out_tsv, 'wb')
|
|
77 output_writer = csv.writer(out_file, delimiter="\t")
|
|
78
|
|
79 # Write headers
|
|
80 output_writer.writerow(list(dict.keys()))
|
|
81
|
|
82 # Write
|
|
83 for record_index in xrange(len(dict[dict.keys()[0]])):
|
|
84 row = [dict[k][record_index] for k in dict]
|
|
85 output_writer.writerow(row)
|
|
86
|
|
87
|
|
88
|
|
89
|
|
90 def get_nist_out_as_dict(nist_result_file):
|
|
91 '''
|
|
92 Method to parse NIST specific output into a dictionary.
|
|
93 @param nist_result_file: result file as produced by NIST nistms$.exe
|
|
94 '''
|
|
95 # Create dictionary with column name as key
|
|
96 output = OrderedDict()
|
|
97 output['id'] = []
|
|
98 output['compound_name'] = []
|
|
99 output['formula'] = []
|
|
100 output['lib_name'] = []
|
|
101 output['id_in_lib'] = []
|
|
102 output['mf'] = []
|
|
103 output['rmf'] = []
|
|
104 output['prob'] = []
|
|
105 output['cas'] = []
|
|
106 output['mw'] = []
|
|
107
|
|
108
|
|
109 for line in open(nist_result_file):
|
|
110 row = line.split('<<')
|
|
111 if row[0].startswith('Unknown'):
|
|
112 title_row = row[0]
|
|
113 continue
|
|
114 elif row[0].startswith('Hit'):
|
|
115 hit = row
|
|
116
|
|
117 output['id'].append(title_row.split(': ')[1].split(' ')[0])
|
|
118 output['compound_name'].append((hit[1].split('>>')[0]).decode('utf-8', errors='replace')) # see http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
|
|
119 output['formula'].append(hit[2].split('>>')[0])
|
|
120 output['lib_name'].append(hit[3].split('>>')[0])
|
|
121
|
|
122 other_fields_list = (hit[2].split('>>')[1] + hit[3].split('>>')[1]).split(';')
|
|
123 count = 0
|
|
124 for field in other_fields_list:
|
|
125 if field.startswith(' MF: '):
|
|
126 count += 1
|
|
127 output['mf'].append(field.split('MF: ')[1])
|
|
128 elif field.startswith(' RMF: '):
|
|
129 count += 1
|
|
130 output['rmf'].append(field.split('RMF: ')[1])
|
|
131 elif field.startswith(' Prob: '):
|
|
132 count += 1
|
|
133 output['prob'].append(field.split('Prob: ')[1])
|
|
134 elif field.startswith(' CAS:'):
|
|
135 count += 1
|
|
136 output['cas'].append(field.split('CAS:')[1])
|
|
137 elif field.startswith(' Mw: '):
|
|
138 count += 1
|
|
139 output['mw'].append(field.split('Mw: ')[1])
|
|
140 elif field.startswith(' Id: '):
|
|
141 count += 1
|
|
142 output['id_in_lib'].append(field.split('Id: ')[1][0:-2]) # the [0:-2] is to avoid the last 2 characters, namely a '.' and a \n
|
|
143 elif field != '' and field != ' Lib: ':
|
|
144 raise Exception('Error: unexpected field in NIST output: ' + field)
|
|
145
|
|
146 if count != 6:
|
|
147 raise Exception('Error: did not find all expected fields in NIST output')
|
|
148
|
|
149 return output
|
|
150
|
|
151 def get_spectra_file_as_dict(spectrum_file):
|
|
152 '''
|
|
153 Method to parse spectra file in NIST MSP input format into a dictionary.
|
|
154 The idea is to parse the following :
|
|
155
|
|
156 Name: spectrum1
|
|
157 DB#: 1
|
|
158 Num Peaks: 87
|
|
159 14 8; 15 15; 27 18; 28 15; 29 15;
|
|
160 30 11; 32 19; 39 32; 40 12; 41 68;
|
|
161
|
|
162 into:
|
|
163
|
|
164 dict['spectrum1'] = "14 8; 15 15; 27 18; 28 15; 29 15; 30 11; 32 19; 39 32; 40 12; 41 68;"
|
|
165
|
|
166 @param spectrum_file: spectra file in MSP format (e.g. also the format returned by MsClust)
|
|
167 '''
|
|
168
|
|
169 output = OrderedDict()
|
|
170 name = ''
|
|
171 spectrum = ''
|
|
172 for line in open(spectrum_file):
|
|
173 if line.startswith('Name: '):
|
|
174 if name != '':
|
|
175 # store spectrum:
|
|
176 output[name] = spectrum
|
|
177 name = line.split('Name: ')[1].replace('\n','')
|
|
178 spectrum = ''
|
|
179 elif line[0].isdigit():
|
|
180 # parse spectra:
|
|
181 spectrum += line.replace('\n','')
|
|
182
|
|
183 # store also last spectrum:
|
|
184 output[name] = spectrum
|
|
185
|
|
186 return output
|
|
187 |