view test/test_combine_output.py @ 23:85fd05d0d16c

New tool to Query multiple public repositories for elemental compositions from accurate mass values detected by high-resolution mass spectrometers
author pieter.lukasse@wur.nl
date Thu, 03 Apr 2014 16:44:11 +0200
parents 9d5f4f5f764b
children
line wrap: on
line source

'''
Created on Mar 27, 2012

@author: marcelk
'''
from GCMS import combine_output
from pkg_resources import resource_filename  # @UnresolvedImport # pylint: disable=E0611
import os
import shutil
import tempfile
import unittest


class Test(unittest.TestCase):
    '''
    Tests for the 'combine_output' Galaxy tool
    '''

    def setUp(self):
        self.rf_output = resource_filename(__name__, "data/RankFilter.txt")
        self.cl_output = resource_filename(__name__, "data/CasLookup.txt")

    def test_process_data(self):
        '''
        Tests the processing of the RankFilter and CasLookup files into dictionaries
        '''
        rfdata = combine_output._process_data(self.rf_output)
        cldata = combine_output._process_data(self.cl_output)
        self.assertEqual(set([' 18457-04-0', ' 55133-95-4', ' 58-08-2', ' 112-34-5']), set(rfdata['CAS']))
        self.assertEqual(set(['C58082', 'C18457040', 'C55133954', 'C112345']), set(cldata['CAS']))

    def test_add_hit(self):
        '''
        Tests the combination of two records from both the RankFilter- and CasLookup-tools
        '''
        rfdata = combine_output._process_data(self.rf_output)
        cldata = combine_output._process_data(self.cl_output)
        index = 0
        rf_record = dict(zip(rfdata.keys(), [rfdata[key][index] for key in rfdata.keys()]))
        cl_record = dict(zip(cldata.keys(), [cldata[key][index] for key in cldata.keys()]))

        hit = combine_output._add_hit(rf_record, cl_record)
        self.assertEqual(len(hit), 27)

        # Pass empty record, should fail combination
        self.assertRaises(KeyError, combine_output._add_hit, rf_record, {})

    def test_merge_data(self):
        '''
        Tests the merging of the RankFilter and CasLookup data
        '''
        rfdata = combine_output._process_data(self.rf_output)
        cldata = combine_output._process_data(self.cl_output)
        merged, _ = combine_output._merge_data(rfdata, cldata)
        centrotypes = _get_centrotypes(merged)
        self.failUnless(all(centrotype in centrotypes for centrotype in ('2716','12723', '3403', '12710')))

def _get_centrotypes(merged):
    '''
    returns centrotype codes found in merged set
    '''
    result = []
    for item_idx in xrange(len(merged)):
        item = merged[item_idx]
        centrotype = item[0][0]
        result.append(centrotype) 

    return result 

    def test_remove_formula(self):
        '''
        Tests the removal of the Formula from the 'Name' field (RankFilter output)
        '''
        name = "Caffeine C8H10N4O2"
        compound_name, compound_formula = combine_output._remove_formula(name)
        self.assertEqual(compound_name, 'Caffeine')
        self.assertEqual(compound_formula, 'C8H10N4O2')
        name = "Ethanol C2H6O"
        compound_name, compound_formula = combine_output._remove_formula(name)
        self.assertEqual(compound_name, 'Ethanol')
        self.assertEqual(compound_formula, 'C2H6O')
        # No formula to remove
        name = "Butanoic acid, 4-[(trimethylsilyl)oxy]-, trimethylsilyl ester"
        compound_name, compound_formula = combine_output._remove_formula(name)
        self.assertEqual(compound_name, name)
        self.assertEqual(compound_formula, False)

    def test_save_data(self):
        '''
        Tests the creation of the output tabular files (no content testing)
        '''
        temp_folder = tempfile.mkdtemp(prefix='gcms_combine_output_')
        saved_single_data = '{0}/{1}'.format(temp_folder, 'output_single.tsv')
        saved_multi_data = '{0}/{1}'.format(temp_folder, 'output_multi.tsv')
        rfdata = combine_output._process_data(self.rf_output)
        cldata = combine_output._process_data(self.cl_output)
        merged, nhits = combine_output._merge_data(rfdata, cldata)
        combine_output._save_data(merged, nhits, saved_single_data, saved_multi_data)
        self.failUnless(os.path.exists(saved_single_data))
        self.failUnless(os.path.exists(saved_multi_data))
        shutil.rmtree(temp_folder)


if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()