diff test/integration_tests.py @ 0:9d5f4f5f764b

Initial commit to toolshed
author pieter.lukasse@wur.nl
date Thu, 16 Jan 2014 13:10:00 +0100
parents
children 53e1eee93430
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/integration_tests.py	Thu Jan 16 13:10:00 2014 +0100
@@ -0,0 +1,268 @@
+'''Integration tests for the GCMS project'''
+
+from pkg_resources import resource_filename  # @UnresolvedImport # pylint: disable=E0611
+from GCMS import library_lookup, combine_output
+from GCMS.rankfilter_GCMS import rankfilter
+import os.path
+import sys
+import unittest
+import re
+
+
+class IntegrationTest(unittest.TestCase):
+    def test_library_lookup(self):
+        '''
+        Run main for data/NIST_tabular and compare produced files with references determined earlier.
+        '''
+        # Create out folder
+        outdir = "output/" #tempfile.mkdtemp(prefix='test_library_lookup')
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+        outfile_base = os.path.join(outdir, 'produced_library_lookup')
+        outfile_txt = outfile_base + '.txt'
+
+        #Build up arguments and run
+        input_txt = resource_filename(__name__, "data/NIST_tabular.txt")
+        library = resource_filename(__name__, "data/RIDB_subset.txt")
+        regress_model = resource_filename(__name__, "data/ridb_poly_regression.txt")
+        sys.argv = ['test',
+                    library,
+                    input_txt,
+                    'Capillary',
+                    'Semi-standard non-polar',
+                    outfile_txt,
+                    'HP-5',
+                    regress_model]
+        # Execute main function with arguments provided through sys.argv
+        library_lookup.main()
+        #Compare with reference files
+        reference_txt = resource_filename(__name__, 'reference/produced_library_lookup.txt')
+        
+        #read both the reference file  and actual output files
+        expected = _read_file(reference_txt)
+        actual = _read_file(outfile_txt)
+        
+        #convert the read in files to lists we can compare
+        expected = expected.split()
+        actual = actual.split()
+
+        for exp, act in zip(expected, actual):
+            if re.match('\\d+\\.\\d+', exp):
+                exp = float(exp)
+                act = float(act)
+                self.assertAlmostEqual(exp, act, places=5)
+            else:
+                # compare values
+                self.failUnlessEqual(expected, actual)
+
+
+    def test_combine_output_simple(self):
+        '''
+        Run main for data/NIST_tabular and compare produced files with references determined earlier.
+        '''
+        # Create out folder
+        outdir = "output/" #tempfile.mkdtemp(prefix='test_library_lookup')
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+        outfile_base = os.path.join(outdir, 'produced_combine_output')
+        outfile_single_txt = outfile_base + '_single.txt'
+        outfile_multi_txt = outfile_base + '_multi.txt'
+
+        #Build up arguments and run
+        input_rankfilter = resource_filename(__name__, "data/Rankfilter.txt")
+        input_caslookup = resource_filename(__name__, "data/Caslookup.txt")
+        sys.argv = ['test',
+                    input_rankfilter,
+                    input_caslookup,
+                    outfile_single_txt,
+                    outfile_multi_txt]
+        # Execute main function with arguments provided through sys.argv
+        combine_output.main()
+        #Compare with reference files
+        # reference_single_txt = resource_filename(__name__, 'reference/produced_combine_output_single.txt')
+        # reference_multi_txt = resource_filename(__name__, 'reference/produced_combine_output_multi.txt')
+        # self.failUnlessEqual(_read_file(reference_single_txt), _read_file(outfile_single_txt))
+        # self.failUnlessEqual(_read_file(reference_multi_txt), _read_file(outfile_multi_txt))
+
+        #Clean up
+        #shutil.rmtree(tempdir)
+
+
+        
+    def def_test_rank_filter_advanced(self):
+        '''
+        Run main of RankFilter
+        '''
+        # Create out folder
+        outdir = "output/integration/"
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+
+        #Build up arguments and run
+        input_txt = resource_filename(__name__, "data/integration/RankFilterInput_conf.txt")
+        sys.argv = ['test', 
+                    input_txt]
+        # Execute main function with arguments provided through sys.argv
+        rankfilter.main()
+        #Compare with reference files
+               
+    def def_test_library_lookup_advanced(self):
+        '''
+        Run main for data/NIST_tabular and compare produced files with references determined earlier.
+        '''
+        # Create out folder
+        outdir = "output/integration/" 
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+        outfile_base = os.path.join(outdir, 'produced_library_lookup_ADVANCED')
+        outfile_txt = outfile_base + '.txt'
+
+        #Build up arguments and run
+        input_txt = resource_filename(__name__, "data/integration/NIST_identification_results_tabular.txt")
+        library = resource_filename(__name__, "data/integration/Library_RI_DB_capillary_columns-noDuplicates.txt")
+        regress_model = resource_filename(__name__, "data/integration/regression_MODEL_for_columns.txt")
+        sys.argv = ['test',
+                    library,
+                    input_txt,
+                    'Capillary',
+                    'Semi-standard non-polar',
+                    outfile_txt,
+                    'DB-5',
+                    regress_model]
+        # Execute main function with arguments provided through sys.argv
+        library_lookup.main()
+
+
+        
+    def test_combine_output_advanced(self):
+        '''
+        Variant on test case above, but a bit more complex as some of the centrotypes have
+        different NIST hits which should give them different RI values. This test also
+        runs not only the combine output, but the other two preceding steps as well, 
+        so it ensures the integration also works on the current code of all three tools. 
+        '''
+            
+        # Run RankFilter 
+        self.def_test_rank_filter_advanced()
+        
+        # Run library CAS RI lookup
+        self.def_test_library_lookup_advanced()
+        
+        outdir = "output/integration/"    
+        outfile_base = os.path.join(outdir, 'produced_combine_output')
+        outfile_single_txt = outfile_base + '_single.txt'
+        outfile_multi_txt = outfile_base + '_multi.txt'
+
+        #Build up arguments and run
+        input_rankfilter = resource_filename(__name__, "output/integration/produced_rank_filter_out.txt")
+        input_caslookup = resource_filename(__name__, "output/integration/produced_library_lookup_ADVANCED.txt")
+        sys.argv = ['test',
+                    input_rankfilter,
+                    input_caslookup,
+                    outfile_single_txt,
+                    outfile_multi_txt]
+        # Execute main function with arguments provided through sys.argv
+        combine_output.main()
+        #Compare with reference files
+#        reference_single_txt = resource_filename(__name__, 'reference/produced_combine_output_single.txt')
+#        reference_multi_txt = resource_filename(__name__, 'reference/produced_combine_output_multi.txt')
+#        self.failUnlessEqual(_read_file(reference_single_txt), _read_file(outfile_single_txt))
+#        self.failUnlessEqual(_read_file(reference_multi_txt), _read_file(outfile_multi_txt))
+        
+        # Check 1: output single should have one record per centrotype:
+        
+        
+        # Check 2: output single has more records than output single:
+        combine_result_single_items =  combine_output._process_data(outfile_single_txt)
+        combine_result_multi_items =  combine_output._process_data(outfile_multi_txt)
+        self.assertGreater(len(combine_result_single_items['Centrotype']), 
+                           len(combine_result_multi_items['Centrotype']))
+        
+        
+        # Check 3: library_lookup RI column, centrotype column, ri_svr column are correct:
+        caslookup_items = combine_output._process_data(input_caslookup)
+        rankfilter_items = combine_output._process_data(input_rankfilter)
+        
+        # check that the caslookup RI column is correctly maintained in its original order in
+        # the combined file:
+        ri_caslookup = caslookup_items['RI']
+        ri_combine_single = combine_result_single_items['RI']
+        self.assertListEqual(ri_caslookup, ri_combine_single) 
+        
+        # check the centrotype column's integrity:
+        centrotype_caslookup = caslookup_items['Centrotype']
+        centrotype_combine_single = combine_result_single_items['Centrotype']
+        centrotype_rankfilter = _get_centrotype_rankfilter(rankfilter_items['ID'])
+        self.assertListEqual(centrotype_caslookup, centrotype_combine_single)
+        self.assertListEqual(centrotype_caslookup, centrotype_rankfilter)
+                
+        # integration and integrity checks:
+        file_NIST = resource_filename(__name__, "data/integration/NIST_identification_results_tabular.txt")
+        file_NIST_items = combine_output._process_data(file_NIST)
+        # check that rank filter output has exactly the same ID items as the original NIST input file:
+        self.assertListEqual(file_NIST_items['ID'], rankfilter_items['ID']) 
+        # check the same for the CAS column:
+        self.assertListEqual(_get_strippedcas(file_NIST_items['CAS']), rankfilter_items['CAS'])
+        # now check the NIST CAS column against the cas lookup results:  
+        cas_NIST = _get_processedcas(file_NIST_items['CAS'])
+        self.assertListEqual(cas_NIST, caslookup_items['CAS'])
+        # now check the CAS of the combined result. If all checks are OK, it means the CAS column's order
+        # and values remained stable throughout all steps: 
+        self.assertListEqual(rankfilter_items['CAS'], combine_result_single_items['CAS']) 
+        
+        # check that the rankfilter RIsvr column is correctly maintained in its original order in
+        # the combined file:
+        risvr_rankfilter = rankfilter_items['RIsvr']
+        risvr_combine_single = combine_result_single_items['RIsvr']
+        self.assertListEqual(risvr_rankfilter, risvr_combine_single) 
+
+        
+   
+
+def _get_centrotype_rankfilter(id_list):
+    '''
+    returns the list of centrotype ids given a list of ID in the
+    form e.g. 74-1.0-564-1905200-7, where the numbers before the 
+    first "-" are the centrotype id
+    '''
+    result = []
+    for compound_id_idx in xrange(len(id_list)):
+        compound_id = id_list[compound_id_idx]
+        centrotype = compound_id.split('-')[0]
+        result.append(centrotype) 
+
+    return result
+
+
+def _get_processedcas(cas_list):
+    '''
+    returns the list cas numbers in the form C64175 instead of 64-17-5
+    '''
+    result = []
+    for cas_id_idx in xrange(len(cas_list)):
+        cas = cas_list[cas_id_idx]
+        processed_cas = 'C' + str(cas.replace('-', '').strip())
+        result.append(processed_cas) 
+
+    return result
+
+def _get_strippedcas(cas_list):
+    '''
+    removes the leading white space from e.g. " 64-17-5"
+    '''
+    result = []
+    for cas_id_idx in xrange(len(cas_list)):
+        cas = cas_list[cas_id_idx]
+        processed_cas = cas.strip()
+        result.append(processed_cas) 
+
+    return result
+
+
+def _read_file(filename):
+    '''
+    Helper method to quickly read a file
+    @param filename:
+    '''
+    with open(filename) as handle:
+        return handle.read()