1 '''Integration tests for the GCMS project'''
3 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611
4 from GCMS import library_lookup, combine_output
5 from GCMS.rankfilter_GCMS import rankfilter
6 import os.path
7 import sys
8 import unittest
9 import re
12 class IntegrationTest(unittest.TestCase):
13 def test_library_lookup(self):
14 '''
15 Run main for data/NIST_tabular and compare produced files with references determined earlier.
16 '''
17 # Create out folder
18 outdir = "output/" #tempfile.mkdtemp(prefix='test_library_lookup')
19 if not os.path.exists(outdir):
20 os.makedirs(outdir)
21 outfile_base = os.path.join(outdir, 'produced_library_lookup')
22 outfile_txt = outfile_base + '.txt'
24 #Build up arguments and run
25 input_txt = resource_filename(__name__, "data/NIST_tabular.txt")
26 library = resource_filename(__name__, "data/RIDB_subset.txt")
27 regress_model = resource_filename(__name__, "data/ridb_poly_regression.txt")
28 sys.argv = ['test',
29 library,
30 input_txt,
31 'Capillary',
32 'Semi-standard non-polar',
33 outfile_txt,
34 'HP-5',
35 regress_model]
36 # Execute main function with arguments provided through sys.argv
37 library_lookup.main()
38 #Compare with reference files
39 reference_txt = resource_filename(__name__, 'reference/produced_library_lookup.txt')
41 #read both the reference file and actual output files
42 expected = _read_file(reference_txt)
43 actual = _read_file(outfile_txt)
45 #convert the read in files to lists we can compare
46 expected = expected.split()
47 actual = actual.split()
49 for exp, act in zip(expected, actual):
50 if re.match('\\d+\\.\\d+', exp):
51 exp = float(exp)
52 act = float(act)
53 self.assertAlmostEqual(exp, act, places=5)
54 else:
55 # compare values
56 self.failUnlessEqual(expected, actual)
59 def test_combine_output_simple(self):
60 '''
61 Run main for data/NIST_tabular and compare produced files with references determined earlier.
62 '''
63 # Create out folder
64 outdir = "output/" #tempfile.mkdtemp(prefix='test_library_lookup')
65 if not os.path.exists(outdir):
66 os.makedirs(outdir)
67 outfile_base = os.path.join(outdir, 'produced_combine_output')
68 outfile_single_txt = outfile_base + '_single.txt'
69 outfile_multi_txt = outfile_base + '_multi.txt'
71 #Build up arguments and run
72 input_rankfilter = resource_filename(__name__, "data/Rankfilter.txt")
73 input_caslookup = resource_filename(__name__, "data/Caslookup.txt")
74 sys.argv = ['test',
75 input_rankfilter,
76 input_caslookup,
77 outfile_single_txt,
78 outfile_multi_txt]
79 # Execute main function with arguments provided through sys.argv
80 combine_output.main()
81 #Compare with reference files
82 # reference_single_txt = resource_filename(__name__, 'reference/produced_combine_output_single.txt')
83 # reference_multi_txt = resource_filename(__name__, 'reference/produced_combine_output_multi.txt')
84 # self.failUnlessEqual(_read_file(reference_single_txt), _read_file(outfile_single_txt))
85 # self.failUnlessEqual(_read_file(reference_multi_txt), _read_file(outfile_multi_txt))
87 #Clean up
88 #shutil.rmtree(tempdir)
92 def def_test_rank_filter_advanced(self):
93 '''
94 Run main of RankFilter
95 '''
96 # Create out folder
97 outdir = "output/integration/"
98 if not os.path.exists(outdir):
99 os.makedirs(outdir)
101 #Build up arguments and run
102 input_txt = resource_filename(__name__, "data/integration/RankFilterInput_conf.txt")
103 sys.argv = ['test',
104 input_txt]
105 # Execute main function with arguments provided through sys.argv
106 rankfilter.main()
107 #Compare with reference files
109 def def_test_library_lookup_advanced(self):
110 '''
111 Run main for data/NIST_tabular and compare produced files with references determined earlier.
112 '''
113 # Create out folder
114 outdir = "output/integration/"
115 if not os.path.exists(outdir):
116 os.makedirs(outdir)
117 outfile_base = os.path.join(outdir, 'produced_library_lookup_ADVANCED')
118 outfile_txt = outfile_base + '.txt'
120 #Build up arguments and run
121 input_txt = resource_filename(__name__, "data/integration/NIST_identification_results_tabular.txt")
122 library = resource_filename(__name__, "../repositories/PRIMS-metabolomics/RI_DB_libraries/Library_RI_DB_capillary_columns-noDuplicates.txt")
123 regress_model = resource_filename(__name__, "data/integration/regression_MODEL_for_columns.txt")
124 sys.argv = ['test',
125 library,
126 input_txt,
127 'Capillary',
128 'Semi-standard non-polar',
129 outfile_txt,
130 'DB-5',
131 regress_model]
132 # Execute main function with arguments provided through sys.argv
133 library_lookup.main()
137 def test_combine_output_advanced(self):
138 '''
139 Variant on test case above, but a bit more complex as some of the centrotypes have
140 different NIST hits which should give them different RI values. This test also
141 runs not only the combine output, but the other two preceding steps as well,
142 so it ensures the integration also works on the current code of all three tools.
143 '''
145 # Run RankFilter
146 self.def_test_rank_filter_advanced()
148 # Run library CAS RI lookup
149 self.def_test_library_lookup_advanced()
151 outdir = "output/integration/"
152 outfile_base = os.path.join(outdir, 'produced_combine_output')
153 outfile_single_txt = outfile_base + '_single.txt'
154 outfile_multi_txt = outfile_base + '_multi.txt'
156 #Build up arguments and run
157 input_rankfilter = resource_filename(__name__, "output/integration/produced_rank_filter_out.txt")
158 input_caslookup = resource_filename(__name__, "output/integration/produced_library_lookup_ADVANCED.txt")
159 sys.argv = ['test',
160 input_rankfilter,
161 input_caslookup,
162 outfile_single_txt,
163 outfile_multi_txt]
164 # Execute main function with arguments provided through sys.argv
165 combine_output.main()
166 #Compare with reference files
167 # reference_single_txt = resource_filename(__name__, 'reference/produced_combine_output_single.txt')
168 # reference_multi_txt = resource_filename(__name__, 'reference/produced_combine_output_multi.txt')
169 # self.failUnlessEqual(_read_file(reference_single_txt), _read_file(outfile_single_txt))
170 # self.failUnlessEqual(_read_file(reference_multi_txt), _read_file(outfile_multi_txt))
172 # Check 1: output single should have one record per centrotype:
175 # Check 2: output single has more records than output single:
176 combine_result_single_items = combine_output._process_data(outfile_single_txt)
177 combine_result_multi_items = combine_output._process_data(outfile_multi_txt)
178 self.assertGreater(len(combine_result_single_items['Centrotype']),
179 len(combine_result_multi_items['Centrotype']))
182 # Check 3: library_lookup RI column, centrotype column, ri_svr column are correct:
183 caslookup_items = combine_output._process_data(input_caslookup)
184 rankfilter_items = combine_output._process_data(input_rankfilter)
186 # check that the caslookup RI column is correctly maintained in its original order in
187 # the combined file:
188 ri_caslookup = caslookup_items['RI']
189 ri_combine_single = combine_result_single_items['RI']
190 self.assertListEqual(ri_caslookup, ri_combine_single)
192 # check the centrotype column's integrity:
193 centrotype_caslookup = caslookup_items['Centrotype']
194 centrotype_combine_single = combine_result_single_items['Centrotype']
195 centrotype_rankfilter = _get_centrotype_rankfilter(rankfilter_items['ID'])
196 self.assertListEqual(centrotype_caslookup, centrotype_combine_single)
197 self.assertListEqual(centrotype_caslookup, centrotype_rankfilter)
199 # integration and integrity checks:
200 file_NIST = resource_filename(__name__, "data/integration/NIST_identification_results_tabular.txt")
201 file_NIST_items = combine_output._process_data(file_NIST)
202 # check that rank filter output has exactly the same ID items as the original NIST input file:
203 self.assertListEqual(file_NIST_items['ID'], rankfilter_items['ID'])
204 # check the same for the CAS column:
205 self.assertListEqual(_get_strippedcas(file_NIST_items['CAS']), rankfilter_items['CAS'])
206 # now check the NIST CAS column against the cas lookup results:
207 cas_NIST = _get_processedcas(file_NIST_items['CAS'])
208 self.assertListEqual(cas_NIST, caslookup_items['CAS'])
209 # now check the CAS of the combined result. If all checks are OK, it means the CAS column's order
210 # and values remained stable throughout all steps:
211 self.assertListEqual(rankfilter_items['CAS'], combine_result_single_items['CAS'])
213 # check that the rankfilter RIsvr column is correctly maintained in its original order in
214 # the combined file:
215 risvr_rankfilter = rankfilter_items['RIsvr']
216 risvr_combine_single = combine_result_single_items['RIsvr']
217 self.assertListEqual(risvr_rankfilter, risvr_combine_single)
222 def _get_centrotype_rankfilter(id_list):
223 '''
224 returns the list of centrotype ids given a list of ID in the
225 form e.g. 74-1.0-564-1905200-7, where the numbers before the
226 first "-" are the centrotype id
227 '''
228 result = []
229 for compound_id_idx in xrange(len(id_list)):
230 compound_id = id_list[compound_id_idx]
231 centrotype = compound_id.split('-')[0]
232 result.append(centrotype)
234 return result
237 def _get_processedcas(cas_list):
238 '''
239 returns the list cas numbers in the form C64175 instead of 64-17-5
240 '''
241 result = []
242 for cas_id_idx in xrange(len(cas_list)):
243 cas = cas_list[cas_id_idx]
244 processed_cas = 'C' + str(cas.replace('-', '').strip())
245 result.append(processed_cas)
247 return result
249 def _get_strippedcas(cas_list):
250 '''
251 removes the leading white space from e.g. " 64-17-5"
252 '''
253 result = []
254 for cas_id_idx in xrange(len(cas_list)):
255 cas = cas_list[cas_id_idx]
256 processed_cas = cas.strip()
257 result.append(processed_cas)
259 return result
262 def _read_file(filename):
263 '''
264 Helper method to quickly read a file
265 @param filename:
266 '''
267 with open(filename) as handle:
268 return handle.read()