annotate test/integration_tests.py @ 49:f772a5caa86a

Added more options and better documentation. Added MsClust support for parsing XCMS alignment results. Improved output reports for XCMS wrappers. New tools.
author pieter.lukasse@wur.nl
date Wed, 10 Dec 2014 22:03:27 +0100
parents 53e1eee93430
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
1 '''Integration tests for the GCMS project'''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
2
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
3 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
4 from GCMS import library_lookup, combine_output
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
5 from GCMS.rankfilter_GCMS import rankfilter
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
6 import os.path
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
7 import sys
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
8 import unittest
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
9 import re
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
10
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
11
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
12 class IntegrationTest(unittest.TestCase):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
13 def test_library_lookup(self):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
14 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
15 Run main for data/NIST_tabular and compare produced files with references determined earlier.
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
16 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
17 # Create out folder
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
18 outdir = "output/" #tempfile.mkdtemp(prefix='test_library_lookup')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
19 if not os.path.exists(outdir):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
20 os.makedirs(outdir)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
21 outfile_base = os.path.join(outdir, 'produced_library_lookup')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
22 outfile_txt = outfile_base + '.txt'
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
23
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
24 #Build up arguments and run
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
25 input_txt = resource_filename(__name__, "data/NIST_tabular.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
26 library = resource_filename(__name__, "data/RIDB_subset.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
27 regress_model = resource_filename(__name__, "data/ridb_poly_regression.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
28 sys.argv = ['test',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
29 library,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
30 input_txt,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
31 'Capillary',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
32 'Semi-standard non-polar',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
33 outfile_txt,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
34 'HP-5',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
35 regress_model]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
36 # Execute main function with arguments provided through sys.argv
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
37 library_lookup.main()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
38 #Compare with reference files
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
39 reference_txt = resource_filename(__name__, 'reference/produced_library_lookup.txt')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
40
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
41 #read both the reference file and actual output files
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
42 expected = _read_file(reference_txt)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
43 actual = _read_file(outfile_txt)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
44
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
45 #convert the read in files to lists we can compare
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
46 expected = expected.split()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
47 actual = actual.split()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
48
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
49 for exp, act in zip(expected, actual):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
50 if re.match('\\d+\\.\\d+', exp):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
51 exp = float(exp)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
52 act = float(act)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
53 self.assertAlmostEqual(exp, act, places=5)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
54 else:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
55 # compare values
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
56 self.failUnlessEqual(expected, actual)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
57
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
58
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
59 def test_combine_output_simple(self):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
60 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
61 Run main for data/NIST_tabular and compare produced files with references determined earlier.
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
62 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
63 # Create out folder
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
64 outdir = "output/" #tempfile.mkdtemp(prefix='test_library_lookup')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
65 if not os.path.exists(outdir):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
66 os.makedirs(outdir)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
67 outfile_base = os.path.join(outdir, 'produced_combine_output')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
68 outfile_single_txt = outfile_base + '_single.txt'
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
69 outfile_multi_txt = outfile_base + '_multi.txt'
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
70
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
71 #Build up arguments and run
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
72 input_rankfilter = resource_filename(__name__, "data/Rankfilter.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
73 input_caslookup = resource_filename(__name__, "data/Caslookup.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
74 sys.argv = ['test',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
75 input_rankfilter,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
76 input_caslookup,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
77 outfile_single_txt,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
78 outfile_multi_txt]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
79 # Execute main function with arguments provided through sys.argv
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
80 combine_output.main()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
81 #Compare with reference files
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
82 # reference_single_txt = resource_filename(__name__, 'reference/produced_combine_output_single.txt')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
83 # reference_multi_txt = resource_filename(__name__, 'reference/produced_combine_output_multi.txt')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
84 # self.failUnlessEqual(_read_file(reference_single_txt), _read_file(outfile_single_txt))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
85 # self.failUnlessEqual(_read_file(reference_multi_txt), _read_file(outfile_multi_txt))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
86
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
87 #Clean up
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
88 #shutil.rmtree(tempdir)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
89
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
90
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
91
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
92 def def_test_rank_filter_advanced(self):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
93 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
94 Run main of RankFilter
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
95 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
96 # Create out folder
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
97 outdir = "output/integration/"
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
98 if not os.path.exists(outdir):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
99 os.makedirs(outdir)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
100
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
101 #Build up arguments and run
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
102 input_txt = resource_filename(__name__, "data/integration/RankFilterInput_conf.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
103 sys.argv = ['test',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
104 input_txt]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
105 # Execute main function with arguments provided through sys.argv
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
106 rankfilter.main()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
107 #Compare with reference files
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
108
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
109 def def_test_library_lookup_advanced(self):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
110 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
111 Run main for data/NIST_tabular and compare produced files with references determined earlier.
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
112 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
113 # Create out folder
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
114 outdir = "output/integration/"
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
115 if not os.path.exists(outdir):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
116 os.makedirs(outdir)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
117 outfile_base = os.path.join(outdir, 'produced_library_lookup_ADVANCED')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
118 outfile_txt = outfile_base + '.txt'
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
119
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
120 #Build up arguments and run
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
121 input_txt = resource_filename(__name__, "data/integration/NIST_identification_results_tabular.txt")
16
53e1eee93430 Last tested version
pieter.lukasse@wur.nl
parents: 0
diff changeset
122 library = resource_filename(__name__, "../repositories/PRIMS-metabolomics/RI_DB_libraries/Library_RI_DB_capillary_columns-noDuplicates.txt")
0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
123 regress_model = resource_filename(__name__, "data/integration/regression_MODEL_for_columns.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
124 sys.argv = ['test',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
125 library,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
126 input_txt,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
127 'Capillary',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
128 'Semi-standard non-polar',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
129 outfile_txt,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
130 'DB-5',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
131 regress_model]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
132 # Execute main function with arguments provided through sys.argv
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
133 library_lookup.main()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
134
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
135
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
136
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
137 def test_combine_output_advanced(self):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
138 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
139 Variant on test case above, but a bit more complex as some of the centrotypes have
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
140 different NIST hits which should give them different RI values. This test also
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
141 runs not only the combine output, but the other two preceding steps as well,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
142 so it ensures the integration also works on the current code of all three tools.
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
143 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
144
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
145 # Run RankFilter
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
146 self.def_test_rank_filter_advanced()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
147
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
148 # Run library CAS RI lookup
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
149 self.def_test_library_lookup_advanced()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
150
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
151 outdir = "output/integration/"
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
152 outfile_base = os.path.join(outdir, 'produced_combine_output')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
153 outfile_single_txt = outfile_base + '_single.txt'
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
154 outfile_multi_txt = outfile_base + '_multi.txt'
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
155
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
156 #Build up arguments and run
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
157 input_rankfilter = resource_filename(__name__, "output/integration/produced_rank_filter_out.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
158 input_caslookup = resource_filename(__name__, "output/integration/produced_library_lookup_ADVANCED.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
159 sys.argv = ['test',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
160 input_rankfilter,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
161 input_caslookup,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
162 outfile_single_txt,
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
163 outfile_multi_txt]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
164 # Execute main function with arguments provided through sys.argv
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
165 combine_output.main()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
166 #Compare with reference files
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
167 # reference_single_txt = resource_filename(__name__, 'reference/produced_combine_output_single.txt')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
168 # reference_multi_txt = resource_filename(__name__, 'reference/produced_combine_output_multi.txt')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
169 # self.failUnlessEqual(_read_file(reference_single_txt), _read_file(outfile_single_txt))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
170 # self.failUnlessEqual(_read_file(reference_multi_txt), _read_file(outfile_multi_txt))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
171
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
172 # Check 1: output single should have one record per centrotype:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
173
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
174
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
175 # Check 2: output single has more records than output single:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
176 combine_result_single_items = combine_output._process_data(outfile_single_txt)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
177 combine_result_multi_items = combine_output._process_data(outfile_multi_txt)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
178 self.assertGreater(len(combine_result_single_items['Centrotype']),
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
179 len(combine_result_multi_items['Centrotype']))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
180
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
181
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
182 # Check 3: library_lookup RI column, centrotype column, ri_svr column are correct:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
183 caslookup_items = combine_output._process_data(input_caslookup)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
184 rankfilter_items = combine_output._process_data(input_rankfilter)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
185
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
186 # check that the caslookup RI column is correctly maintained in its original order in
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
187 # the combined file:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
188 ri_caslookup = caslookup_items['RI']
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
189 ri_combine_single = combine_result_single_items['RI']
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
190 self.assertListEqual(ri_caslookup, ri_combine_single)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
191
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
192 # check the centrotype column's integrity:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
193 centrotype_caslookup = caslookup_items['Centrotype']
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
194 centrotype_combine_single = combine_result_single_items['Centrotype']
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
195 centrotype_rankfilter = _get_centrotype_rankfilter(rankfilter_items['ID'])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
196 self.assertListEqual(centrotype_caslookup, centrotype_combine_single)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
197 self.assertListEqual(centrotype_caslookup, centrotype_rankfilter)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
198
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
199 # integration and integrity checks:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
200 file_NIST = resource_filename(__name__, "data/integration/NIST_identification_results_tabular.txt")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
201 file_NIST_items = combine_output._process_data(file_NIST)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
202 # check that rank filter output has exactly the same ID items as the original NIST input file:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
203 self.assertListEqual(file_NIST_items['ID'], rankfilter_items['ID'])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
204 # check the same for the CAS column:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
205 self.assertListEqual(_get_strippedcas(file_NIST_items['CAS']), rankfilter_items['CAS'])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
206 # now check the NIST CAS column against the cas lookup results:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
207 cas_NIST = _get_processedcas(file_NIST_items['CAS'])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
208 self.assertListEqual(cas_NIST, caslookup_items['CAS'])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
209 # now check the CAS of the combined result. If all checks are OK, it means the CAS column's order
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
210 # and values remained stable throughout all steps:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
211 self.assertListEqual(rankfilter_items['CAS'], combine_result_single_items['CAS'])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
212
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
213 # check that the rankfilter RIsvr column is correctly maintained in its original order in
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
214 # the combined file:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
215 risvr_rankfilter = rankfilter_items['RIsvr']
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
216 risvr_combine_single = combine_result_single_items['RIsvr']
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
217 self.assertListEqual(risvr_rankfilter, risvr_combine_single)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
218
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
219
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
220
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
221
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
222 def _get_centrotype_rankfilter(id_list):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
223 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
224 returns the list of centrotype ids given a list of ID in the
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
225 form e.g. 74-1.0-564-1905200-7, where the numbers before the
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
226 first "-" are the centrotype id
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
227 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
228 result = []
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
229 for compound_id_idx in xrange(len(id_list)):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
230 compound_id = id_list[compound_id_idx]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
231 centrotype = compound_id.split('-')[0]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
232 result.append(centrotype)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
233
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
234 return result
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
235
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
236
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
237 def _get_processedcas(cas_list):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
238 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
239 returns the list cas numbers in the form C64175 instead of 64-17-5
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
240 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
241 result = []
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
242 for cas_id_idx in xrange(len(cas_list)):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
243 cas = cas_list[cas_id_idx]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
244 processed_cas = 'C' + str(cas.replace('-', '').strip())
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
245 result.append(processed_cas)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
246
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
247 return result
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
248
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
249 def _get_strippedcas(cas_list):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
250 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
251 removes the leading white space from e.g. " 64-17-5"
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
252 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
253 result = []
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
254 for cas_id_idx in xrange(len(cas_list)):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
255 cas = cas_list[cas_id_idx]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
256 processed_cas = cas.strip()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
257 result.append(processed_cas)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
258
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
259 return result
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
260
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
261
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
262 def _read_file(filename):
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
263 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
264 Helper method to quickly read a file
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
265 @param filename:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
266 '''
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
267 with open(filename) as handle:
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
268 return handle.read()