Mercurial > repos > padge > clipkit
comparison clipkit_repo/tests/unit/test_helpers.py @ 0:49b058e85902 draft
"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
| author | padge |
|---|---|
| date | Fri, 25 Mar 2022 13:04:31 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:49b058e85902 |
|---|---|
| 1 import pytest | |
| 2 import pytest_mock | |
| 3 from pathlib import Path | |
| 4 | |
| 5 | |
| 6 import numpy as np | |
| 7 from Bio import AlignIO | |
| 8 from Bio import SeqIO | |
| 9 from Bio.Seq import Seq | |
| 10 from Bio.SeqRecord import SeqRecord | |
| 11 from Bio.Align import MultipleSeqAlignment | |
| 12 | |
| 13 from clipkit.helpers import count_characters_at_position | |
| 14 from clipkit.helpers import get_sequence_at_position_and_report_features | |
| 15 from clipkit.helpers import parsimony_informative_or_constant | |
| 16 from clipkit.helpers import populate_empty_keepD_and_trimD | |
| 17 from clipkit.helpers import join_keepD_and_trimD | |
| 18 from clipkit.helpers import write_trimD | |
| 19 from clipkit.helpers import write_keepD | |
| 20 from clipkit.files import FileFormat | |
| 21 | |
| 22 here = Path(__file__) | |
| 23 | |
| 24 | |
| 25 @pytest.fixture | |
| 26 def sample_msa(): | |
| 27 return MultipleSeqAlignment( | |
| 28 [ | |
| 29 SeqRecord( | |
| 30 seq=Seq("['A']"), | |
| 31 id="1", | |
| 32 name="<unknown name>", | |
| 33 description="", | |
| 34 dbxrefs=[], | |
| 35 ), | |
| 36 SeqRecord( | |
| 37 seq=Seq("['A']"), | |
| 38 id="2", | |
| 39 name="<unknown name>", | |
| 40 description="", | |
| 41 dbxrefs=[], | |
| 42 ), | |
| 43 SeqRecord( | |
| 44 seq=Seq("['A']"), | |
| 45 id="3", | |
| 46 name="<unknown name>", | |
| 47 description="", | |
| 48 dbxrefs=[], | |
| 49 ), | |
| 50 SeqRecord( | |
| 51 seq=Seq("['A']"), | |
| 52 id="4", | |
| 53 name="<unknown name>", | |
| 54 description="", | |
| 55 dbxrefs=[], | |
| 56 ), | |
| 57 SeqRecord( | |
| 58 seq=Seq("['A']"), | |
| 59 id="5", | |
| 60 name="<unknown name>", | |
| 61 description="", | |
| 62 dbxrefs=[], | |
| 63 ), | |
| 64 ] | |
| 65 ) | |
| 66 | |
| 67 | |
| 68 class TestCountCharactersAtPosition(object): | |
| 69 def test_gives_count_for_each_char(self): | |
| 70 ## setup | |
| 71 s = "ACTTTGGG" | |
| 72 | |
| 73 ## execution | |
| 74 res = count_characters_at_position(s) | |
| 75 | |
| 76 ## check results | |
| 77 # test that each character has an associated key | |
| 78 for char in s: | |
| 79 assert char in res.keys() | |
| 80 | |
| 81 # test that the len of the res is equal to the | |
| 82 # number of unique string characters | |
| 83 assert len(res) == len(set(s)) | |
| 84 | |
| 85 | |
| 86 class TestGetSequenceAtPositionAndReportFeatures(object): | |
| 87 def test_gets_sequence_and_gappyness(self): | |
| 88 ## setup | |
| 89 alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta") | |
| 90 i = int(5) | |
| 91 | |
| 92 ## execution | |
| 93 seq, gappyness = get_sequence_at_position_and_report_features(alignment, i) | |
| 94 | |
| 95 ## check results | |
| 96 # test output types | |
| 97 assert isinstance(seq, str) | |
| 98 assert isinstance(gappyness, float) | |
| 99 | |
| 100 | |
| 101 class TestParsimonyInformativeOrConstant(object): | |
| 102 def test_parsimony_informative_or_constant(self): | |
| 103 ## set up | |
| 104 # pi = parsimony informative | |
| 105 num_occurences_pi = {"A": 5, "T": 10, "G": 2, "C": 4} | |
| 106 # npi = not parsimony informative | |
| 107 num_occurences_npi = {"A": 1, "T": 10, "G": 1} | |
| 108 # Const = constant | |
| 109 num_occurences_const = {"A": 10} | |
| 110 # nConst = not constant | |
| 111 num_occurences_nconst = {"A": 1} | |
| 112 | |
| 113 ## execution | |
| 114 # result is True and False | |
| 115 ( | |
| 116 is_parsimony_informative, | |
| 117 constant_site_holder_is_pi, | |
| 118 ) = parsimony_informative_or_constant(num_occurences_pi) | |
| 119 # result is False and False | |
| 120 ( | |
| 121 is_not_parsimony_informative, | |
| 122 constant_site_holder_is_npi, | |
| 123 ) = parsimony_informative_or_constant(num_occurences_npi) | |
| 124 # result is False and True | |
| 125 is_not_pi_0, is_constant_site = parsimony_informative_or_constant( | |
| 126 num_occurences_const | |
| 127 ) | |
| 128 # result is False and False | |
| 129 is_not_pi_1, is_not_constant_site = parsimony_informative_or_constant( | |
| 130 num_occurences_nconst | |
| 131 ) | |
| 132 | |
| 133 ## check results | |
| 134 assert is_parsimony_informative == True and constant_site_holder_is_pi == False | |
| 135 assert ( | |
| 136 is_not_parsimony_informative == False | |
| 137 and constant_site_holder_is_npi == False | |
| 138 ) | |
| 139 assert is_not_pi_0 == False and is_constant_site == True | |
| 140 assert is_not_pi_1 == False and is_not_constant_site == False | |
| 141 | |
| 142 | |
| 143 class TestPopulateEmptyKeepDAndTrimD(object): | |
| 144 def test_populate_empty_keepD_and_trimD(self): | |
| 145 ## set up | |
| 146 alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta") | |
| 147 | |
| 148 ## execution | |
| 149 keepD, trimD = populate_empty_keepD_and_trimD(alignment) | |
| 150 | |
| 151 ## check results | |
| 152 expected_keepD = { | |
| 153 "1": np.zeros([6], dtype=bytes), | |
| 154 "2": np.zeros([6], dtype=bytes), | |
| 155 "3": np.zeros([6], dtype=bytes), | |
| 156 "4": np.zeros([6], dtype=bytes), | |
| 157 "5": np.zeros([6], dtype=bytes), | |
| 158 } | |
| 159 expected_trimD = { | |
| 160 "1": np.zeros([6], dtype=bytes), | |
| 161 "2": np.zeros([6], dtype=bytes), | |
| 162 "3": np.zeros([6], dtype=bytes), | |
| 163 "4": np.zeros([6], dtype=bytes), | |
| 164 "5": np.zeros([6], dtype=bytes), | |
| 165 } | |
| 166 | |
| 167 assert expected_keepD.keys() == keepD.keys() | |
| 168 assert all( | |
| 169 np.array_equal(expected_keepD[key], keepD[key]) for key in expected_keepD | |
| 170 ) | |
| 171 assert expected_trimD.keys() == trimD.keys() | |
| 172 assert all( | |
| 173 np.array_equal(expected_trimD[key], trimD[key]) for key in expected_trimD | |
| 174 ) | |
| 175 | |
| 176 | |
| 177 class TestJoinKeepDAndTrimD(object): | |
| 178 def test_join_keepD_and_trimD(self): | |
| 179 ## set up | |
| 180 | |
| 181 keepD = { | |
| 182 '1': np.array([b'A', b'-', b'G', b'T', b'A', b'T'], dtype='|S1'), | |
| 183 '2': np.array([b'A', b'-', b'G', b'-', b'A', b'T'], dtype='|S1'), | |
| 184 '3': np.array([b'A', b'-', b'G', b'-', b'T', b'A'], dtype='|S1'), | |
| 185 '4': np.array([b'A', b'G', b'A', b'-', b'T', b'A'], dtype='|S1'), | |
| 186 '5': np.array([b'A', b'C', b'a', b'-', b'T', b'-'], dtype='|S1') | |
| 187 } | |
| 188 | |
| 189 trimD = { | |
| 190 '1': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), | |
| 191 '2': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), | |
| 192 '3': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), | |
| 193 '4': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), | |
| 194 '5': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1') | |
| 195 } | |
| 196 | |
| 197 ## execution | |
| 198 keepD, trimD = join_keepD_and_trimD(keepD, trimD) | |
| 199 | |
| 200 ## check results | |
| 201 expected_keepD = { | |
| 202 "1": "A-GTAT", | |
| 203 "2": "A-G-AT", | |
| 204 "3": "A-G-TA", | |
| 205 "4": "AGA-TA", | |
| 206 "5": "ACa-T-", | |
| 207 } | |
| 208 expected_trimD = {"1": "", "2": "", "3": "", "4": "", "5": ""} | |
| 209 | |
| 210 assert expected_keepD == keepD | |
| 211 assert expected_trimD == trimD | |
| 212 | |
| 213 | |
| 214 class TestWriteKeepD(object): | |
| 215 def test_write_keepD_writes_file(self, mocker, sample_msa): | |
| 216 ## set up | |
| 217 keepD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]} | |
| 218 out_file = "output_file_name.fa" | |
| 219 out_file_format = FileFormat.fasta | |
| 220 mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment") | |
| 221 mock_msa.return_value = sample_msa | |
| 222 mock_write = mocker.patch("clipkit.helpers.SeqIO.write") | |
| 223 | |
| 224 ## execution | |
| 225 write_keepD(keepD, out_file, out_file_format) | |
| 226 | |
| 227 ## check results | |
| 228 mock_write.assert_called_once_with(sample_msa, out_file, out_file_format.value) | |
| 229 | |
| 230 | |
| 231 class TestWriteTrimD(object): | |
| 232 def test_write_trimD_calls_seqio_write(self, mocker, sample_msa): | |
| 233 ## set up | |
| 234 trimD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]} | |
| 235 out_file = "output_file_name.fa" | |
| 236 out_file_format = FileFormat.fasta | |
| 237 mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment") | |
| 238 mock_msa.return_value = sample_msa | |
| 239 mock_write = mocker.patch("Bio.SeqIO.write") | |
| 240 | |
| 241 ## execution | |
| 242 write_trimD(trimD, out_file, out_file_format) | |
| 243 | |
| 244 ## check results | |
| 245 expected_completmentOut = f"{out_file}.complement" | |
| 246 mock_write.assert_called_once_with( | |
| 247 sample_msa, expected_completmentOut, out_file_format.value | |
| 248 ) |
