Mercurial > repos > pieterlukasse > prims_metabolomics
comparison query_metexp.py @ 22:cd4f13119afa
Small fix in filters part and improvement in query_metexp time logging
author | pieter.lukasse@wur.nl |
---|---|
date | Thu, 06 Mar 2014 14:29:55 +0100 |
parents | 19d8fd10248e |
children |
comparison
equal
deleted
inserted
replaced
21:19d8fd10248e | 22:cd4f13119afa |
---|---|
12 ''' | 12 ''' |
13 import csv | 13 import csv |
14 import sys | 14 import sys |
15 import fileinput | 15 import fileinput |
16 import urllib2 | 16 import urllib2 |
17 import time | |
17 from collections import OrderedDict | 18 from collections import OrderedDict |
18 | 19 |
19 __author__ = "Pieter Lukasse" | 20 __author__ = "Pieter Lukasse" |
20 __contact__ = "pieter.lukasse@wur.nl" | 21 __contact__ = "pieter.lukasse@wur.nl" |
21 __copyright__ = "Copyright, 2014, Plant Research International, WUR" | 22 __copyright__ = "Copyright, 2014, Plant Research International, WUR" |
42 def _query_and_add_data(input_data, casid_col, formula_col, molecular_mass_col, metexp_dblink, separation_method): | 43 def _query_and_add_data(input_data, casid_col, formula_col, molecular_mass_col, metexp_dblink, separation_method): |
43 ''' | 44 ''' |
44 This method will iterate over the record in the input_data and | 45 This method will iterate over the record in the input_data and |
45 will enrich them with the related information found (if any) in the | 46 will enrich them with the related information found (if any) in the |
46 MetExp Database. | 47 MetExp Database. |
48 | |
49 # TODO : could optimize this with multi-threading, see also nice example at http://stackoverflow.com/questions/2846653/python-multithreading-for-dummies | |
47 ''' | 50 ''' |
48 merged = [] | 51 merged = [] |
49 | 52 |
50 for i in xrange(len(input_data[input_data.keys()[0]])): | 53 for i in xrange(len(input_data[input_data.keys()[0]])): |
51 # Get the record in same dictionary format as input_data, but containing | 54 # Get the record in same dictionary format as input_data, but containing |
245 | 248 |
246 The input file can be any tabular file, as long as it contains a column for the molecular mass | 249 The input file can be any tabular file, as long as it contains a column for the molecular mass |
247 and one for the formula of the respective identification. These two columns are then | 250 and one for the formula of the respective identification. These two columns are then |
248 used to query against MetExp Database. | 251 used to query against MetExp Database. |
249 ''' | 252 ''' |
253 seconds_start = int(round(time.time())) | |
254 | |
250 input_file = sys.argv[1] | 255 input_file = sys.argv[1] |
251 casid_col = sys.argv[2] | 256 casid_col = sys.argv[2] |
252 formula_col = sys.argv[3] | 257 formula_col = sys.argv[3] |
253 molecular_mass_col = sys.argv[4] | 258 molecular_mass_col = sys.argv[4] |
254 metexp_dblink_file = sys.argv[5] | 259 metexp_dblink_file = sys.argv[5] |
265 enriched_data = _query_and_add_data(input_data, casid_col, formula_col, molecular_mass_col, metexp_dblink, separation_method) | 270 enriched_data = _query_and_add_data(input_data, casid_col, formula_col, molecular_mass_col, metexp_dblink, separation_method) |
266 headers = input_data.keys() + ['METEXP hits for ','METEXP hits: organisms', 'METEXP hits: tissues', | 271 headers = input_data.keys() + ['METEXP hits for ','METEXP hits: organisms', 'METEXP hits: tissues', |
267 'METEXP hits: experiments','METEXP hits: user names','METEXP hits: column types', 'METEXP hits: CAS nrs', 'Link to METEXP hits'] | 272 'METEXP hits: experiments','METEXP hits: user names','METEXP hits: column types', 'METEXP hits: CAS nrs', 'Link to METEXP hits'] |
268 | 273 |
269 _save_data(enriched_data, headers, output_result) | 274 _save_data(enriched_data, headers, output_result) |
270 | 275 |
276 seconds_end = int(round(time.time())) | |
277 print "Took " + str(seconds_end - seconds_start) + " seconds" | |
278 | |
279 | |
271 | 280 |
272 if __name__ == '__main__': | 281 if __name__ == '__main__': |
273 main() | 282 main() |