comparison clipkit_repo/tests/integration/test_kpic_smart_gap_mode.py @ 0:49b058e85902 draft

"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author padge
date Fri, 25 Mar 2022 13:04:31 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:49b058e85902
1 import pytest
2 from pathlib import Path
3
4 from clipkit.clipkit import execute
5 from clipkit.files import FileFormat
6 from clipkit.modes import TrimmingMode
7
8 here = Path(__file__)
9
10
11 @pytest.mark.integration
12 class TestKPICSmartGapsMode(object):
13 def test_simple_no_change(self):
14 """
15 usage: clipkit simple.fa -m kpic-smart-gap
16 """
17 input_file = f"{here.parent}/samples/simple.fa"
18 output_file = "output/simple.fa_smart_gaps"
19 kwargs = dict(
20 input_file=input_file,
21 output_file=output_file,
22 input_file_format='fasta',
23 output_file_format='fasta',
24 complement=False,
25 gaps=0.8,
26 mode=TrimmingMode.kpic_smart_gap,
27 use_log=False,
28 )
29 execute(**kwargs)
30
31 with open(
32 f"{here.parent}/expected/simple.fa_kpic_smart_gaps", "r"
33 ) as expected:
34 expected_content = expected.read()
35
36 with open(output_file, "r") as out_file:
37 output_content = out_file.read()
38
39 assert expected_content == output_content
40
41 def test_simple_no_change(self):
42 """
43 usage: clipkit simple_long_description.fa -m kpic-smart-gap
44 """
45 input_file = f"{here.parent}/samples/simple_long_description.fa"
46 output_file = "output/simple_long_description.fa_kpic_smart_gaps"
47 kwargs = dict(
48 input_file=input_file,
49 output_file=output_file,
50 input_file_format='fasta',
51 output_file_format='fasta',
52 complement=False,
53 gaps=0.8,
54 mode=TrimmingMode.kpic_smart_gap,
55 use_log=False,
56 )
57 execute(**kwargs)
58
59 with open(
60 f"{here.parent}/expected/simple_long_description.fa_kpic_smart_gaps", "r"
61 ) as expected:
62 expected_content = expected.read()
63
64 with open(output_file, "r") as out_file:
65 output_content = out_file.read()
66
67 assert expected_content == output_content
68
69 def test_12_YIL115C_Anc_2_253_codon_aln(self):
70 """
71 test gappy with codon alignment of yeast sequences
72 usage: clipkit 12_YIL115C_Anc_2.253_codon_aln.fasta -m kpic-smart-gap
73 """
74 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_codon_aln.fasta"
75 output_file = "output/12_YIL115C_Anc_2.253_codon_aln.fasta.clipkit_kpic_smart_gaps"
76 in_file_format = 'fasta'
77 out_file_format = 'fasta'
78
79 kwargs = dict(
80 input_file=input_file,
81 output_file=output_file,
82 input_file_format='fasta',
83 output_file_format='fasta',
84 complement=False,
85 gaps=0.9167,
86 mode=TrimmingMode.kpic_smart_gap,
87 use_log=False,
88 )
89 execute(**kwargs)
90
91 with open(
92 f"{here.parent}/expected/12_YIL115C_Anc_2.253_codon_aln.clipkit_kpic_smart_gaps", "r"
93 ) as expected:
94 expected_content = expected.read()
95
96 with open(output_file, "r") as out_file:
97 output_content = out_file.read()
98
99 assert expected_content == output_content
100
101 def test_12_YIL115C_Anc_2_253_aa_aln(self):
102 """
103 test gappy with amino acid alignment of yeast sequences
104 usage: clipkit 12_YIL115C_Anc_2.253_aa_aln.fasta -m kpic-smart-gap
105 """
106 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_aa_aln.fasta"
107 output_file = "output/12_YIL115C_Anc_2.253_aa_aln.fasta.clipkit_smart_gaps"
108 in_file_format = 'fasta'
109 out_file_format = 'fasta'
110
111 kwargs = dict(
112 input_file=input_file,
113 output_file=output_file,
114 input_file_format='fasta',
115 output_file_format='fasta',
116 complement=False,
117 gaps=0.9167,
118 mode=TrimmingMode.kpic_smart_gap,
119 use_log=False,
120 )
121 execute(**kwargs)
122
123 with open(
124 f"{here.parent}/expected/12_YIL115C_Anc_2.253_aa_aln.clipkit_kpic_smart_gaps", "r"
125 ) as expected:
126 expected_content = expected.read()
127
128 with open(output_file, "r") as out_file:
129 output_content = out_file.read()
130
131 assert expected_content == output_content
132
133 def test_24_ENSG00000163519_aa_aln(self):
134 """
135 test gappy with amino acid alignment of mammalian sequences
136 usage: clipkit 24_ENSG00000163519_aa_aln.fasta -m kpic-smart-gap
137 """
138 input_file = f"{here.parent}/samples/24_ENSG00000163519_aa_aln.fasta"
139 output_file = "output/24_ENSG00000163519_aa_aln.fasta.clipkit"
140 in_file_format = 'fasta'
141 out_file_format = 'fasta'
142
143 kwargs = dict(
144 input_file=input_file,
145 output_file=output_file,
146 input_file_format='fasta',
147 output_file_format='fasta',
148 complement=False,
149 gaps=0.9583,
150 mode=TrimmingMode.kpic_smart_gap,
151 use_log=False,
152 )
153 execute(**kwargs)
154
155 with open(
156 f"{here.parent}/expected/24_ENSG00000163519_aa_aln.clipkit_kpic_smart_gaps", "r"
157 ) as expected:
158 expected_content = expected.read()
159
160 with open(output_file, "r") as out_file:
161 output_content = out_file.read()
162
163 assert expected_content == output_content
164
165 def test_24_ENSG00000163519_codon_aln(self):
166 """
167 test gappy with codon alignment of mammalian sequences
168 usage: clipkit 24_ENSG00000163519_codon_aln.fasta -m kpic-smart-gap
169 """
170 input_file = f"{here.parent}/samples/24_ENSG00000163519_codon_aln.fasta"
171 output_file = "output/24_ENSG00000163519_codon_aln.fasta.clipkit"
172 in_file_format = 'fasta'
173 out_file_format = 'fasta'
174
175 kwargs = dict(
176 input_file=input_file,
177 output_file=output_file,
178 input_file_format='fasta',
179 output_file_format='fasta',
180 complement=False,
181 gaps=0.9583,
182 mode=TrimmingMode.kpic_smart_gap,
183 use_log=False,
184 )
185 execute(**kwargs)
186
187 with open(
188 f"{here.parent}/expected/24_ENSG00000163519_codon_aln.clipkit_kpic_smart_gaps", "r"
189 ) as expected:
190 expected_content = expected.read()
191
192 with open(output_file, "r") as out_file:
193 output_content = out_file.read()
194
195 assert expected_content == output_content
196
197 def test_EOG091N44M8_aa(self):
198 """
199 test gappy with amino acid alignment of Penicillium sequences
200 usage: clipkit EOG091N44M8_aa.fa -m kpic-smart-gap
201 """
202 input_file = f"{here.parent}/samples/EOG091N44M8_aa.fa"
203 output_file = "output/EOG091N44M8_aa.fa.clipkit"
204 in_file_format = 'fasta'
205 out_file_format = 'fasta'
206
207 kwargs = dict(
208 input_file=input_file,
209 output_file=output_file,
210 input_file_format='fasta',
211 output_file_format='fasta',
212 complement=False,
213 gaps=0.8803,
214 mode=TrimmingMode.kpic_smart_gap,
215 use_log=False,
216 )
217 execute(**kwargs)
218
219 with open(f"{here.parent}/expected/EOG091N44M8_aa.clipkit_kpic_smart_gaps", "r") as expected:
220 expected_content = expected.read()
221
222 with open(output_file, "r") as out_file:
223 output_content = out_file.read()
224
225 assert expected_content == output_content
226
227 def test_EOG091N44M8_nt(self):
228 """
229 test gappy with nucleotide alignment of Penicillium sequences
230 usage: clipkit EOG091N44M8_nt.fa -m kpic-smart-gap
231 """
232 input_file = f"{here.parent}/samples/EOG091N44M8_nt.fa"
233 output_file = "output/EOG091N44M8_nt.fa.clipkit"
234 in_file_format = 'fasta'
235 out_file_format = 'fasta'
236
237 kwargs = dict(
238 input_file=input_file,
239 output_file=output_file,
240 input_file_format='fasta',
241 output_file_format='fasta',
242 complement=False,
243 gaps=0.8803,
244 mode=TrimmingMode.kpic_smart_gap,
245 use_log=False,
246 )
247 execute(**kwargs)
248
249 with open(f"{here.parent}/expected/EOG091N44M8_nt.clipkit_kpic_smart_gaps", "r") as expected:
250 expected_content = expected.read()
251
252 with open(output_file, "r") as out_file:
253 output_content = out_file.read()
254
255 assert expected_content == output_content
256
257 @pytest.mark.slow
258 def test_EOG092C4VOX_aa(self):
259 """
260 test gappy with amino alignment of fungal sequences
261 usage: clipkit EOG092C4VOX_aa_aln.fasta -m kpic-smart-gap
262 """
263 input_file = f"{here.parent}/samples/EOG092C4VOX_aa_aln.fasta"
264 output_file = "output/EOG092C4VOX_aa_aln.fasta.clipkit"
265 in_file_format = 'fasta'
266 out_file_format = 'fasta'
267
268 kwargs = dict(
269 input_file=input_file,
270 output_file=output_file,
271 input_file_format='fasta',
272 output_file_format='fasta',
273 complement=False,
274 gaps=0.9993,
275 mode=TrimmingMode.kpic_smart_gap,
276 use_log=False,
277 )
278 execute(**kwargs)
279
280 with open(
281 f"{here.parent}/expected/EOG092C4VOX_aa_aln.clipkit_kpic_smart_gaps", "r"
282 ) as expected:
283 expected_content = expected.read()
284
285 with open(output_file, "r") as out_file:
286 output_content = out_file.read()
287
288 assert expected_content == output_content