comparison clipkit_repo/tests/unit/test_helpers.py @ 0:49b058e85902 draft

"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author padge
date Fri, 25 Mar 2022 13:04:31 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:49b058e85902
1 import pytest
2 import pytest_mock
3 from pathlib import Path
4
5
6 import numpy as np
7 from Bio import AlignIO
8 from Bio import SeqIO
9 from Bio.Seq import Seq
10 from Bio.SeqRecord import SeqRecord
11 from Bio.Align import MultipleSeqAlignment
12
13 from clipkit.helpers import count_characters_at_position
14 from clipkit.helpers import get_sequence_at_position_and_report_features
15 from clipkit.helpers import parsimony_informative_or_constant
16 from clipkit.helpers import populate_empty_keepD_and_trimD
17 from clipkit.helpers import join_keepD_and_trimD
18 from clipkit.helpers import write_trimD
19 from clipkit.helpers import write_keepD
20 from clipkit.files import FileFormat
21
22 here = Path(__file__)
23
24
25 @pytest.fixture
26 def sample_msa():
27 return MultipleSeqAlignment(
28 [
29 SeqRecord(
30 seq=Seq("['A']"),
31 id="1",
32 name="<unknown name>",
33 description="",
34 dbxrefs=[],
35 ),
36 SeqRecord(
37 seq=Seq("['A']"),
38 id="2",
39 name="<unknown name>",
40 description="",
41 dbxrefs=[],
42 ),
43 SeqRecord(
44 seq=Seq("['A']"),
45 id="3",
46 name="<unknown name>",
47 description="",
48 dbxrefs=[],
49 ),
50 SeqRecord(
51 seq=Seq("['A']"),
52 id="4",
53 name="<unknown name>",
54 description="",
55 dbxrefs=[],
56 ),
57 SeqRecord(
58 seq=Seq("['A']"),
59 id="5",
60 name="<unknown name>",
61 description="",
62 dbxrefs=[],
63 ),
64 ]
65 )
66
67
68 class TestCountCharactersAtPosition(object):
69 def test_gives_count_for_each_char(self):
70 ## setup
71 s = "ACTTTGGG"
72
73 ## execution
74 res = count_characters_at_position(s)
75
76 ## check results
77 # test that each character has an associated key
78 for char in s:
79 assert char in res.keys()
80
81 # test that the len of the res is equal to the
82 # number of unique string characters
83 assert len(res) == len(set(s))
84
85
86 class TestGetSequenceAtPositionAndReportFeatures(object):
87 def test_gets_sequence_and_gappyness(self):
88 ## setup
89 alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta")
90 i = int(5)
91
92 ## execution
93 seq, gappyness = get_sequence_at_position_and_report_features(alignment, i)
94
95 ## check results
96 # test output types
97 assert isinstance(seq, str)
98 assert isinstance(gappyness, float)
99
100
101 class TestParsimonyInformativeOrConstant(object):
102 def test_parsimony_informative_or_constant(self):
103 ## set up
104 # pi = parsimony informative
105 num_occurences_pi = {"A": 5, "T": 10, "G": 2, "C": 4}
106 # npi = not parsimony informative
107 num_occurences_npi = {"A": 1, "T": 10, "G": 1}
108 # Const = constant
109 num_occurences_const = {"A": 10}
110 # nConst = not constant
111 num_occurences_nconst = {"A": 1}
112
113 ## execution
114 # result is True and False
115 (
116 is_parsimony_informative,
117 constant_site_holder_is_pi,
118 ) = parsimony_informative_or_constant(num_occurences_pi)
119 # result is False and False
120 (
121 is_not_parsimony_informative,
122 constant_site_holder_is_npi,
123 ) = parsimony_informative_or_constant(num_occurences_npi)
124 # result is False and True
125 is_not_pi_0, is_constant_site = parsimony_informative_or_constant(
126 num_occurences_const
127 )
128 # result is False and False
129 is_not_pi_1, is_not_constant_site = parsimony_informative_or_constant(
130 num_occurences_nconst
131 )
132
133 ## check results
134 assert is_parsimony_informative == True and constant_site_holder_is_pi == False
135 assert (
136 is_not_parsimony_informative == False
137 and constant_site_holder_is_npi == False
138 )
139 assert is_not_pi_0 == False and is_constant_site == True
140 assert is_not_pi_1 == False and is_not_constant_site == False
141
142
143 class TestPopulateEmptyKeepDAndTrimD(object):
144 def test_populate_empty_keepD_and_trimD(self):
145 ## set up
146 alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta")
147
148 ## execution
149 keepD, trimD = populate_empty_keepD_and_trimD(alignment)
150
151 ## check results
152 expected_keepD = {
153 "1": np.zeros([6], dtype=bytes),
154 "2": np.zeros([6], dtype=bytes),
155 "3": np.zeros([6], dtype=bytes),
156 "4": np.zeros([6], dtype=bytes),
157 "5": np.zeros([6], dtype=bytes),
158 }
159 expected_trimD = {
160 "1": np.zeros([6], dtype=bytes),
161 "2": np.zeros([6], dtype=bytes),
162 "3": np.zeros([6], dtype=bytes),
163 "4": np.zeros([6], dtype=bytes),
164 "5": np.zeros([6], dtype=bytes),
165 }
166
167 assert expected_keepD.keys() == keepD.keys()
168 assert all(
169 np.array_equal(expected_keepD[key], keepD[key]) for key in expected_keepD
170 )
171 assert expected_trimD.keys() == trimD.keys()
172 assert all(
173 np.array_equal(expected_trimD[key], trimD[key]) for key in expected_trimD
174 )
175
176
177 class TestJoinKeepDAndTrimD(object):
178 def test_join_keepD_and_trimD(self):
179 ## set up
180
181 keepD = {
182 '1': np.array([b'A', b'-', b'G', b'T', b'A', b'T'], dtype='|S1'),
183 '2': np.array([b'A', b'-', b'G', b'-', b'A', b'T'], dtype='|S1'),
184 '3': np.array([b'A', b'-', b'G', b'-', b'T', b'A'], dtype='|S1'),
185 '4': np.array([b'A', b'G', b'A', b'-', b'T', b'A'], dtype='|S1'),
186 '5': np.array([b'A', b'C', b'a', b'-', b'T', b'-'], dtype='|S1')
187 }
188
189 trimD = {
190 '1': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'),
191 '2': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'),
192 '3': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'),
193 '4': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'),
194 '5': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1')
195 }
196
197 ## execution
198 keepD, trimD = join_keepD_and_trimD(keepD, trimD)
199
200 ## check results
201 expected_keepD = {
202 "1": "A-GTAT",
203 "2": "A-G-AT",
204 "3": "A-G-TA",
205 "4": "AGA-TA",
206 "5": "ACa-T-",
207 }
208 expected_trimD = {"1": "", "2": "", "3": "", "4": "", "5": ""}
209
210 assert expected_keepD == keepD
211 assert expected_trimD == trimD
212
213
214 class TestWriteKeepD(object):
215 def test_write_keepD_writes_file(self, mocker, sample_msa):
216 ## set up
217 keepD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]}
218 out_file = "output_file_name.fa"
219 out_file_format = FileFormat.fasta
220 mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment")
221 mock_msa.return_value = sample_msa
222 mock_write = mocker.patch("clipkit.helpers.SeqIO.write")
223
224 ## execution
225 write_keepD(keepD, out_file, out_file_format)
226
227 ## check results
228 mock_write.assert_called_once_with(sample_msa, out_file, out_file_format.value)
229
230
231 class TestWriteTrimD(object):
232 def test_write_trimD_calls_seqio_write(self, mocker, sample_msa):
233 ## set up
234 trimD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]}
235 out_file = "output_file_name.fa"
236 out_file_format = FileFormat.fasta
237 mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment")
238 mock_msa.return_value = sample_msa
239 mock_write = mocker.patch("Bio.SeqIO.write")
240
241 ## execution
242 write_trimD(trimD, out_file, out_file_format)
243
244 ## check results
245 expected_completmentOut = f"{out_file}.complement"
246 mock_write.assert_called_once_with(
247 sample_msa, expected_completmentOut, out_file_format.value
248 )