Mercurial > repos > padge > clipkit
diff clipkit_repo/tests/unit/test_helpers.py @ 0:49b058e85902 draft
"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
| author | padge |
|---|---|
| date | Fri, 25 Mar 2022 13:04:31 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clipkit_repo/tests/unit/test_helpers.py Fri Mar 25 13:04:31 2022 +0000 @@ -0,0 +1,248 @@ +import pytest +import pytest_mock +from pathlib import Path + + +import numpy as np +from Bio import AlignIO +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from Bio.Align import MultipleSeqAlignment + +from clipkit.helpers import count_characters_at_position +from clipkit.helpers import get_sequence_at_position_and_report_features +from clipkit.helpers import parsimony_informative_or_constant +from clipkit.helpers import populate_empty_keepD_and_trimD +from clipkit.helpers import join_keepD_and_trimD +from clipkit.helpers import write_trimD +from clipkit.helpers import write_keepD +from clipkit.files import FileFormat + +here = Path(__file__) + + +@pytest.fixture +def sample_msa(): + return MultipleSeqAlignment( + [ + SeqRecord( + seq=Seq("['A']"), + id="1", + name="<unknown name>", + description="", + dbxrefs=[], + ), + SeqRecord( + seq=Seq("['A']"), + id="2", + name="<unknown name>", + description="", + dbxrefs=[], + ), + SeqRecord( + seq=Seq("['A']"), + id="3", + name="<unknown name>", + description="", + dbxrefs=[], + ), + SeqRecord( + seq=Seq("['A']"), + id="4", + name="<unknown name>", + description="", + dbxrefs=[], + ), + SeqRecord( + seq=Seq("['A']"), + id="5", + name="<unknown name>", + description="", + dbxrefs=[], + ), + ] + ) + + +class TestCountCharactersAtPosition(object): + def test_gives_count_for_each_char(self): + ## setup + s = "ACTTTGGG" + + ## execution + res = count_characters_at_position(s) + + ## check results + # test that each character has an associated key + for char in s: + assert char in res.keys() + + # test that the len of the res is equal to the + # number of unique string characters + assert len(res) == len(set(s)) + + +class TestGetSequenceAtPositionAndReportFeatures(object): + def test_gets_sequence_and_gappyness(self): + ## setup + alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta") + i = int(5) + + ## execution + seq, gappyness = get_sequence_at_position_and_report_features(alignment, i) + + ## check results + # test output types + assert isinstance(seq, str) + assert isinstance(gappyness, float) + + +class TestParsimonyInformativeOrConstant(object): + def test_parsimony_informative_or_constant(self): + ## set up + # pi = parsimony informative + num_occurences_pi = {"A": 5, "T": 10, "G": 2, "C": 4} + # npi = not parsimony informative + num_occurences_npi = {"A": 1, "T": 10, "G": 1} + # Const = constant + num_occurences_const = {"A": 10} + # nConst = not constant + num_occurences_nconst = {"A": 1} + + ## execution + # result is True and False + ( + is_parsimony_informative, + constant_site_holder_is_pi, + ) = parsimony_informative_or_constant(num_occurences_pi) + # result is False and False + ( + is_not_parsimony_informative, + constant_site_holder_is_npi, + ) = parsimony_informative_or_constant(num_occurences_npi) + # result is False and True + is_not_pi_0, is_constant_site = parsimony_informative_or_constant( + num_occurences_const + ) + # result is False and False + is_not_pi_1, is_not_constant_site = parsimony_informative_or_constant( + num_occurences_nconst + ) + + ## check results + assert is_parsimony_informative == True and constant_site_holder_is_pi == False + assert ( + is_not_parsimony_informative == False + and constant_site_holder_is_npi == False + ) + assert is_not_pi_0 == False and is_constant_site == True + assert is_not_pi_1 == False and is_not_constant_site == False + + +class TestPopulateEmptyKeepDAndTrimD(object): + def test_populate_empty_keepD_and_trimD(self): + ## set up + alignment = AlignIO.read(f"{here.parent}/examples/simple.fa", "fasta") + + ## execution + keepD, trimD = populate_empty_keepD_and_trimD(alignment) + + ## check results + expected_keepD = { + "1": np.zeros([6], dtype=bytes), + "2": np.zeros([6], dtype=bytes), + "3": np.zeros([6], dtype=bytes), + "4": np.zeros([6], dtype=bytes), + "5": np.zeros([6], dtype=bytes), + } + expected_trimD = { + "1": np.zeros([6], dtype=bytes), + "2": np.zeros([6], dtype=bytes), + "3": np.zeros([6], dtype=bytes), + "4": np.zeros([6], dtype=bytes), + "5": np.zeros([6], dtype=bytes), + } + + assert expected_keepD.keys() == keepD.keys() + assert all( + np.array_equal(expected_keepD[key], keepD[key]) for key in expected_keepD + ) + assert expected_trimD.keys() == trimD.keys() + assert all( + np.array_equal(expected_trimD[key], trimD[key]) for key in expected_trimD + ) + + +class TestJoinKeepDAndTrimD(object): + def test_join_keepD_and_trimD(self): + ## set up + + keepD = { + '1': np.array([b'A', b'-', b'G', b'T', b'A', b'T'], dtype='|S1'), + '2': np.array([b'A', b'-', b'G', b'-', b'A', b'T'], dtype='|S1'), + '3': np.array([b'A', b'-', b'G', b'-', b'T', b'A'], dtype='|S1'), + '4': np.array([b'A', b'G', b'A', b'-', b'T', b'A'], dtype='|S1'), + '5': np.array([b'A', b'C', b'a', b'-', b'T', b'-'], dtype='|S1') + } + + trimD = { + '1': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), + '2': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), + '3': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), + '4': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1'), + '5': np.array([b'', b'', b'', b'', b'', b''], dtype='|S1') + } + + ## execution + keepD, trimD = join_keepD_and_trimD(keepD, trimD) + + ## check results + expected_keepD = { + "1": "A-GTAT", + "2": "A-G-AT", + "3": "A-G-TA", + "4": "AGA-TA", + "5": "ACa-T-", + } + expected_trimD = {"1": "", "2": "", "3": "", "4": "", "5": ""} + + assert expected_keepD == keepD + assert expected_trimD == trimD + + +class TestWriteKeepD(object): + def test_write_keepD_writes_file(self, mocker, sample_msa): + ## set up + keepD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]} + out_file = "output_file_name.fa" + out_file_format = FileFormat.fasta + mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment") + mock_msa.return_value = sample_msa + mock_write = mocker.patch("clipkit.helpers.SeqIO.write") + + ## execution + write_keepD(keepD, out_file, out_file_format) + + ## check results + mock_write.assert_called_once_with(sample_msa, out_file, out_file_format.value) + + +class TestWriteTrimD(object): + def test_write_trimD_calls_seqio_write(self, mocker, sample_msa): + ## set up + trimD = {"1": ["A"], "2": ["A"], "3": ["A"], "4": ["A"], "5": ["A"]} + out_file = "output_file_name.fa" + out_file_format = FileFormat.fasta + mock_msa = mocker.patch("clipkit.helpers.MultipleSeqAlignment") + mock_msa.return_value = sample_msa + mock_write = mocker.patch("Bio.SeqIO.write") + + ## execution + write_trimD(trimD, out_file, out_file_format) + + ## check results + expected_completmentOut = f"{out_file}.complement" + mock_write.assert_called_once_with( + sample_msa, expected_completmentOut, out_file_format.value + )
