Mercurial > repos > pieterlukasse > prims_metabolomics2
diff test/test_library_lookup.py @ 0:dffc38727496
initial commit
author | pieter.lukasse@wur.nl |
---|---|
date | Sat, 07 Feb 2015 22:02:00 +0100 (2015-02-07) |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test_library_lookup.py Sat Feb 07 22:02:00 2015 +0100 @@ -0,0 +1,180 @@ +''' +Created on Mar 6, 2012 + +@author: marcelk +''' +from GCMS import library_lookup, match_library +from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611 +import os +import shutil +import tempfile +import unittest + + +class Test(unittest.TestCase): + ''' + Tests the 'library_lookup' Galaxy tool + ''' + + def setUp(self): + self.ri_database = resource_filename(__name__, "data/RIDB_subset.txt") + self.nist_output = resource_filename(__name__, "data/NIST_tabular.txt") + self.ridb_poly_regress = resource_filename(__name__, "data/ridb_poly_regression.txt") + self.ridb_linear_regress = resource_filename(__name__, "data/ridb_linear_regression.txt") + + def test_create_lookup_table(self): + ''' + Tests the 'create_lookup_table' function + ''' + column_type = 'Capillary' + polarity = 'Semi-standard non-polar' + lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity) + self.assertFalse(False in [res[4] == 'Capillary' for res in lookup_dict['4177166']]) + self.assertEqual(['C51276336', '2,6-Dimethyl-octa-1,7-dien-3,6-diol', 'C10H18O2', + '1277', 'Capillary', 'Semi-standard non-polar', 'DB-5MS', '1', + 'C51276336_DB-5MS', '', '', ''], lookup_dict['51276336'][1]) + + def test_read_model(self): + ''' + Tests reading the regression model data containing the parameters required for converting + retention indices between GC-columns + ''' + model, _ = library_lookup._read_model(self.ridb_poly_regress) + # Order of values: coefficient 1 through 4, left limit, right limit + # Polynomial model + self.assertEqual([20.6155874639486, 0.945187096379008, 3.96480787567566e-05, -9.04377237159287e-09, + 628.0, 2944.0, 405.0, 0, 0.998685262365514], model['HP-5']['SE-54']) + self.assertEqual([-92.3963391356951, 1.26116176393346, -0.000191991657547972, 4.15387371263164e-08, + 494.0, 2198.0, 407.0, 0, 0.996665023122993], model['Apiezon L']['Squalane']) + # Linear model + model, _ = library_lookup._read_model(self.ridb_linear_regress) + self.assertEqual([2.81208738561543, 0.99482475526584, 628.0, 2944.0, 405.0, 0, 0.998643883946458], + model['HP-5']['SE-54']) + self.assertEqual([19.979922768462, 0.993741869298272, 494.0, 2198.0, 407.0, 0, 0.99636062891041], + model['Apiezon L']['Squalane']) + + def test_apply_regression(self): + ''' + Tests the regression model on some arbitrary retention indices + ''' + poly_model, _ = library_lookup._read_model(self.ridb_poly_regress) + linear_model, _ = library_lookup._read_model(self.ridb_linear_regress) + retention_indices = [1000, 1010, 1020, 1030, 1040, 1050] + converted_poly = [] + converted_linear = [] + for ri in retention_indices: + converted_poly.append(library_lookup._apply_poly_regression('HP-5', 'DB-5', ri, poly_model)) + converted_linear.append(library_lookup._apply_linear_regression('HP-5', 'DB-5', ri, linear_model)) + + self.assertEqual([1003.0566541860778, 1013.0979459524663, 1023.1358645806529, 1033.170466241159, + 1043.2018071045052, 1053.2299433412131], converted_poly) + self.assertEqual([1001.8127584915925, 1011.830140783027, 1021.8475230744615, 1031.864905365896, + 1041.8822876573306, 1051.899669948765], converted_linear) + + # Test polynomial limit detection, the following RI falls outside of the possible limits + ri = 3400 + converted_poly = library_lookup._apply_poly_regression('HP-5', 'DB-5', ri, poly_model) + self.assertEqual(False, converted_poly) + + def test_preferred_hit(self): + ''' Tests the matching of the hits with the preferred column, including regression ''' + model, method = library_lookup._read_model(self.ridb_poly_regress) + column_type = 'Capillary' + polarity = 'Semi-standard non-polar' + lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity) + hits = lookup_dict['150867'] + # No regression, should however consider order of given preference + match = library_lookup._preferred(hits, ['SE-52', 'DB-5', 'HP-5'], column_type, polarity, model, method) + expected = (['C150867', '(E)-phytol', 'C20H40O', '2110', 'Capillary', + 'Semi-standard non-polar', 'SE-52', '', 'C150867_SE-52', '', '', ''], False) + self.assertEqual(expected, match) + + # Perform regression by looking for 'OV-101' which isn't there. 'SE-52' has the best regression model + # of the available columns + match = library_lookup._preferred(hits, ['OV-101'], column_type, polarity, model, method) + expected = (['C150867', '(E)-phytol', 'C20H40O', 2158.5769891569125, 'Capillary', + 'Semi-standard non-polar', 'SE-52', '', 'C150867_SE-52', '', '', ''], 'SE-52') + self.assertEqual(expected, match) + + def test_format_result(self): + ''' + Tests the 'format_result' function + ''' + column_type = 'Capillary' + polarity = 'Semi-standard non-polar' + + # Look for DB-5 + pref_column = ['DB-5'] + model, method = library_lookup._read_model(self.ridb_poly_regress) + lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity) + data = library_lookup.format_result(lookup_dict, self.nist_output, pref_column, column_type, + polarity, model, method)#False, None) + + # remove non-hits from set: + data = _get_hits_only(data) + self.assertEqual(['C544354', 'Ethyl linoleate', 'C20H36O2', '2155', 'Capillary', 'Semi-standard non-polar', + 'DB-5', '1', 'C544354_DB-5', '1810', 'None', '', '', '0'], data[20]) + self.assertEqual(111, len(data)) + + # Look for both DB-5 and HP-5 + pref_column = ['DB-5', 'HP-5'] + data = library_lookup.format_result(lookup_dict, self.nist_output, pref_column, column_type, + polarity, False, None) + # remove non-hits from set: + data = _get_hits_only(data) + self.assertEqual(['C502614', '.beta.-(E)-Farnesene', 'C15H24', '1508', 'Capillary', 'Semi-standard non-polar', + 'DB-5', '1', 'C502614_DB-5', '942', 'None', '1482', '1522', '22'], data[50]) + self.assertEqual(106, len(data)) + + + def test_save_data(self): + ''' + Tests the creation of the output tabular file + ''' + temp_folder = tempfile.mkdtemp(prefix='gcms_combine_output_') + saved_data = '{0}/{1}'.format(temp_folder, 'output.tsv') + column_type = 'Capillary' + polarity = 'Semi-standard non-polar' + pref_column = ['DB-5'] + lookup_dict = library_lookup.create_lookup_table(self.ri_database, column_type, polarity) + data = library_lookup.format_result(lookup_dict, self.nist_output, pref_column, column_type, polarity, False, None) + library_lookup._save_data(data, saved_data) + self.failUnless(os.path.exists(saved_data)) + shutil.rmtree(temp_folder) + + + def test_match_library_get_lib_files(self): + ''' + Tests the match_library.py functionality + ''' + riqc_libs_dir = resource_filename(__name__, "../repositories/PRIMS-metabolomics/RI_DB_libraries") + get_library_files_output = match_library.get_directory_files(riqc_libs_dir) + self.assertEqual(2, len(get_library_files_output)) + self.assertEqual("Library_RI_DB_capillary_columns-noDuplicates", get_library_files_output[0][0]) + #TODO change assert below to assert that the result is a file, so the test can run on other dirs as well: + #self.assertEqual("E:\\workspace\\PRIMS-metabolomics\\python-tools\\tools\\GCMS\\test\\data\\riqc_libs\\RI DB library (capillary columns) Dec.2012.txt", get_library_files_output[0][1]) + #self.assertEqual("RI DB library (capillary columns) Jan.2013", get_library_files_output[1][0]) + try: + get_library_files_output = match_library.get_directory_files("/blah") + # should not come here + self.assertTrue(False) + except: + # should come here + self.assertTrue(True) + +def _get_hits_only(data): + ''' + removes items that have RI == 0.0 and Name == '' (these are dummy lines just for the output + ''' + result = [] + for item_idx in xrange(len(data)): + item = data[item_idx] + if item[1] != '' and item[3] > 0.0 : + result.append(item) + + return result + + +if __name__ == "__main__": + #import sys;sys.argv = ['', 'Test.testName'] + unittest.main()