comparison clipkit_repo/tests/integration/test_kpi_gappy_mode.py @ 0:49b058e85902 draft

"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author padge
date Fri, 25 Mar 2022 13:04:31 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:49b058e85902
1 import pytest
2 from pathlib import Path
3
4 from clipkit.clipkit import execute
5 from clipkit.files import FileFormat
6 from clipkit.modes import TrimmingMode
7
8 here = Path(__file__)
9
10
11 @pytest.mark.integration
12 class TestKpiGappyMode(object):
13 def test_simple(self):
14 """
15 usage: clipkit simple.fa -m kpi-gappy
16 """
17 input_file = f"{here.parent}/samples/simple.fa"
18 output_file = "output/simpla.fa.TestKpiGappyMode_test_simple.clipkit"
19
20 kwargs = dict(
21 input_file=input_file,
22 output_file=output_file,
23 input_file_format='fasta',
24 output_file_format='fasta',
25 complement=False,
26 gaps=0.9,
27 mode=TrimmingMode.kpi_gappy,
28 use_log=False,
29 )
30 execute(**kwargs)
31
32 with open(f"{here.parent}/expected/simple.fa_kpi_gappy", "r") as expected:
33 expected_content = expected.read()
34
35 with open(output_file, "r") as out_file:
36 output_content = out_file.read()
37
38 assert expected_content == output_content
39
40 def test_simple_long_description(self):
41 """
42 usage: clipkit simple.fa -m kpi-gappy
43 """
44 input_file = f"{here.parent}/samples/simple_long_description.fa"
45 output_file = "output/simple_long_description.fa.clipkit_kpi_gappy"
46
47 kwargs = dict(
48 input_file=input_file,
49 output_file=output_file,
50 input_file_format='fasta',
51 output_file_format='fasta',
52 complement=False,
53 gaps=0.9,
54 mode=TrimmingMode.kpi_gappy,
55 use_log=False,
56 )
57 execute(**kwargs)
58
59 with open(f"{here.parent}/expected/simple_long_description.fa_kpi_gappy", "r") as expected:
60 expected_content = expected.read()
61
62 with open(output_file, "r") as out_file:
63 output_content = out_file.read()
64
65 assert expected_content == output_content
66
67 def test_12_YIL115C_Anc_2_253_codon_aln(self):
68 """
69 test kpi_gappy with codon alignment of yeast sequences
70 usage: clipkit 12_YIL115C_Anc_2.253_codon_aln.fasta -m kpi-gappy
71 """
72 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_codon_aln.fasta"
73 output_file = "output/12_YIL115C_Anc_2.253_codon_aln.fasta.clipkit"
74
75 kwargs = dict(
76 input_file=input_file,
77 output_file=output_file,
78 input_file_format='fasta',
79 output_file_format='fasta',
80 complement=False,
81 gaps=0.9,
82 mode=TrimmingMode.kpi_gappy,
83 use_log=False,
84 )
85 execute(**kwargs)
86
87 with open(
88 f"{here.parent}/expected/12_YIL115C_Anc_2.253_codon_aln.fasta_kpi_gappy",
89 "r",
90 ) as expected:
91 expected_content = expected.read()
92
93 with open(output_file, "r") as out_file:
94 output_content = out_file.read()
95
96 assert expected_content == output_content
97
98 def test_12_YIL115C_Anc_2_253_aa_aln(self):
99 """
100 test kpi_gappy with amino acid alignment of yeast sequences
101 usage: clipkit 12_YIL115C_Anc_2.253_aa_aln.fasta -m kpi-gappy
102 """
103 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_aa_aln.fasta"
104 output_file = "output/12_YIL115C_Anc_2.253_aa_aln.fasta.clipkit"
105
106 kwargs = dict(
107 input_file=input_file,
108 output_file=output_file,
109 input_file_format='fasta',
110 output_file_format='fasta',
111 complement=False,
112 gaps=0.9,
113 mode=TrimmingMode.kpi_gappy,
114 use_log=False,
115 )
116 execute(**kwargs)
117
118 with open(
119 f"{here.parent}/expected/12_YIL115C_Anc_2.253_aa_aln.fasta_kpi_gappy", "r"
120 ) as expected:
121 expected_content = expected.read()
122
123 with open(output_file, "r") as out_file:
124 output_content = out_file.read()
125
126 assert expected_content == output_content
127
128 def test_24_ENSG00000163519_aa_aln(self):
129 """
130 test kpi_gappy with amino acid alignment of mammalian sequences
131 usage: clipkit 24_ENSG00000163519_aa_aln.fasta -m kpi-gappy
132 """
133 input_file = f"{here.parent}/samples/24_ENSG00000163519_aa_aln.fasta"
134 output_file = "output/24_ENSG00000163519_aa_aln.fasta.clipkit"
135
136 kwargs = dict(
137 input_file=input_file,
138 output_file=output_file,
139 input_file_format='fasta',
140 output_file_format='fasta',
141 complement=False,
142 gaps=0.9,
143 mode=TrimmingMode.kpi_gappy,
144 use_log=False,
145 )
146 execute(**kwargs)
147
148 with open(
149 f"{here.parent}/expected/24_ENSG00000163519_aa_aln.fasta_kpi_gappy", "r"
150 ) as expected:
151 expected_content = expected.read()
152
153 with open(output_file, "r") as out_file:
154 output_content = out_file.read()
155
156 assert expected_content == output_content
157
158 def test_24_ENSG00000163519_codon_aln(self):
159 """
160 test kpi_gappy with codon alignment of mammalian sequences
161 usage: clipkit 24_ENSG00000163519_codon_aln.fasta -m kpi-gappy
162 """
163 input_file = f"{here.parent}/samples/24_ENSG00000163519_codon_aln.fasta"
164 output_file = "output/24_ENSG00000163519_codon_aln.fasta.clipkit"
165
166 kwargs = dict(
167 input_file=input_file,
168 output_file=output_file,
169 input_file_format='fasta',
170 output_file_format='fasta',
171 complement=False,
172 gaps=0.9,
173 mode=TrimmingMode.kpi_gappy,
174 use_log=False,
175 )
176 execute(**kwargs)
177
178 with open(
179 f"{here.parent}/expected/24_ENSG00000163519_codon_aln.fasta_kpi_gappy", "r"
180 ) as expected:
181 expected_content = expected.read()
182
183 with open(output_file, "r") as out_file:
184 output_content = out_file.read()
185
186 assert expected_content == output_content
187
188 def test_EOG091N44M8_aa(self):
189 """
190 test kpi_gappy with amino acid alignment of Penicillium sequences
191 usage: clipkit EOG091N44M8_aa.fa -m kpi-gappy
192 """
193 input_file = f"{here.parent}/samples/EOG091N44M8_aa.fa"
194 output_file = "output/EOG091N44M8_aa.fa.clipkit"
195
196 kwargs = dict(
197 input_file=input_file,
198 output_file=output_file,
199 input_file_format='fasta',
200 output_file_format='fasta',
201 complement=False,
202 gaps=0.9,
203 mode=TrimmingMode.kpi_gappy,
204 use_log=False,
205 )
206 execute(**kwargs)
207
208 with open(
209 f"{here.parent}/expected/EOG091N44M8_aa.fa_kpi_gappy", "r"
210 ) as expected:
211 expected_content = expected.read()
212
213 with open(output_file, "r") as out_file:
214 output_content = out_file.read()
215
216 assert expected_content == output_content
217
218 def test_EOG091N44M8_nt(self):
219 """
220 test kpi_gappy with nucleotide alignment of Penicillium sequences
221 usage: clipkit EOG091N44M8_nt.fa -m kpi-gappy
222 """
223 input_file = f"{here.parent}/samples/EOG091N44M8_nt.fa"
224 output_file = "output/EOG091N44M8_nt.fa.clipkit"
225
226 kwargs = dict(
227 input_file=input_file,
228 output_file=output_file,
229 input_file_format='fasta',
230 output_file_format='fasta',
231 complement=False,
232 gaps=0.9,
233 mode=TrimmingMode.kpi_gappy,
234 use_log=False,
235 )
236 execute(**kwargs)
237
238 with open(
239 f"{here.parent}/expected/EOG091N44M8_nt.fa_kpi_gappy", "r"
240 ) as expected:
241 expected_content = expected.read()
242
243 with open(output_file, "r") as out_file:
244 output_content = out_file.read()
245
246 assert expected_content == output_content
247
248 @pytest.mark.slow
249 def test_EOG092C4VOX_aa(self):
250 """
251 test gappy with amino alignment of fungal sequences
252 usage: clipkit EOG092C4VOX_aa_aln.fasta -m kpi-gappy
253 """
254 input_file = f"{here.parent}/samples/EOG092C4VOX_aa_aln.fasta"
255 output_file = "output/EOG092C4VOX_aa_aln.fasta.clipkit"
256
257 kwargs = dict(
258 input_file=input_file,
259 output_file=output_file,
260 input_file_format='fasta',
261 output_file_format='fasta',
262 complement=False,
263 gaps=0.9,
264 mode=TrimmingMode.kpi_gappy,
265 use_log=False,
266 )
267 execute(**kwargs)
268
269 with open(
270 f"{here.parent}/expected/EOG092C4VOX_aa_aln.fasta_kpi_gappy", "r"
271 ) as expected:
272 expected_content = expected.read()
273
274 with open(output_file, "r") as out_file:
275 output_content = out_file.read()
276
277 assert expected_content == output_content
278
279
280 @pytest.mark.integration
281 class TestKPIGappyModeCustomGapsParameter(object):
282 def test_simple(self):
283 """
284 test kpi_gappy with a custom gaps parameter
285 usage: clipkit simple.fa -g 0.2 -m kpi-gappy
286 """
287 input_file = f"{here.parent}/samples/simple.fa"
288 output_file = "output/simpla.fa.clipkit"
289
290 kwargs = dict(
291 input_file=input_file,
292 output_file=output_file,
293 input_file_format='fasta',
294 output_file_format='fasta',
295 complement=False,
296 gaps=0.2,
297 mode=TrimmingMode.kpi_gappy,
298 use_log=False,
299 )
300 execute(**kwargs)
301
302 with open(
303 f"{here.parent}/expected/simple.fa_kpi_gappy_gaps_set_to_0.2", "r"
304 ) as expected:
305 expected_content = expected.read()
306
307 with open(output_file, "r") as out_file:
308 output_content = out_file.read()
309
310 assert expected_content == output_content
311
312 def test_12_YIL115C_Anc_2_253_codon_aln(self):
313 """
314 test kpi_gappy with codon alignment of yeast sequences
315 usage: clipkit 12_YIL115C_Anc_2.253_codon_aln.fasta -g 0.3 -m kpi-gappy
316 """
317 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_codon_aln.fasta"
318 output_file = "output/12_YIL115C_Anc_2.253_codon_aln.fasta.clipkit"
319
320 kwargs = dict(
321 input_file=input_file,
322 output_file=output_file,
323 input_file_format='fasta',
324 output_file_format='fasta',
325 complement=False,
326 gaps=0.3,
327 mode=TrimmingMode.kpi_gappy,
328 use_log=False,
329 )
330 execute(**kwargs)
331
332 with open(
333 f"{here.parent}/expected/12_YIL115C_Anc_2.253_codon_aln.fasta_kpi_gappy_custom_gaps",
334 "r",
335 ) as expected:
336 expected_content = expected.read()
337
338 with open(output_file, "r") as out_file:
339 output_content = out_file.read()
340
341 assert expected_content == output_content
342
343 def test_24_ENSG00000163519_codon_aln(self):
344 """
345 test kpi_gappy with codon alignment of mammalian sequences
346 usage: clipkit 24_ENSG00000163519_codon_aln.fasta -g .4 -m kpi-gappy
347 """
348 input_file = f"{here.parent}/samples/24_ENSG00000163519_codon_aln.fasta"
349 output_file = "output/24_ENSG00000163519_codon_aln.fasta.clipkit"
350
351 kwargs = dict(
352 input_file=input_file,
353 output_file=output_file,
354 input_file_format='fasta',
355 output_file_format='fasta',
356 complement=False,
357 gaps=0.4,
358 mode=TrimmingMode.kpi_gappy,
359 use_log=False,
360 )
361 execute(**kwargs)
362
363 with open(
364 f"{here.parent}/expected/24_ENSG00000163519_codon_aln.fasta_kpi_gappy_custom_gaps",
365 "r",
366 ) as expected:
367 expected_content = expected.read()
368
369 with open(output_file, "r") as out_file:
370 output_content = out_file.read()
371
372 assert expected_content == output_content
373
374 def test_EOG091N44M8_nt(self):
375 """
376 test kpi_gappy with nucleotide alignment of Penicillium sequences
377 usage: clipkit EOG091N44M8_nt.fa -g .1 -m kpi-gappy
378 """
379 input_file = f"{here.parent}/samples/EOG091N44M8_nt.fa"
380 output_file = "output/EOG091N44M8_nt.fa.clipkit"
381
382 kwargs = dict(
383 input_file=input_file,
384 output_file=output_file,
385 input_file_format='fasta',
386 output_file_format='fasta',
387 complement=False,
388 gaps=0.1,
389 mode=TrimmingMode.kpi_gappy,
390 use_log=False,
391 )
392 execute(**kwargs)
393
394 with open(
395 f"{here.parent}/expected/EOG091N44M8_nt.fa_kpi_gappy_custom_gaps", "r"
396 ) as expected:
397 expected_content = expected.read()
398
399 with open(output_file, "r") as out_file:
400 output_content = out_file.read()
401
402 assert expected_content == output_content
403
404 @pytest.mark.slow
405 def test_EOG092C0CZK_aa(self):
406 """
407 test kpi_gappy with amino alignment of fungal sequences
408 usage: clipkit EOG092C0CZK_aa_aln.fasta -g .5 -m kpi-gappy
409 """
410 input_file = f"{here.parent}/samples/EOG092C0CZK_aa_aln.fasta"
411 output_file = "output/EOG092C0CZK_aa_aln.fasta.clipkit"
412
413 kwargs = dict(
414 input_file=input_file,
415 output_file=output_file,
416 input_file_format='fasta',
417 output_file_format='fasta',
418 complement=False,
419 gaps=0.5,
420 mode=TrimmingMode.kpi_gappy,
421 use_log=False,
422 )
423 execute(**kwargs)
424
425 with open(
426 f"{here.parent}/expected/EOG092C0CZK_aa_aln.fasta_kpi_gappy_custom_gaps",
427 "r",
428 ) as expected:
429 expected_content = expected.read()
430
431 with open(output_file, "r") as out_file:
432 output_content = out_file.read()
433
434 assert expected_content == output_content
435
436 @pytest.mark.slow
437 def test_EOG092C4VOX_aa(self):
438 """
439 test kpi_gappy with amino alignment of fungal sequences
440 usage: clipkit EOG092C4VOX_aa_aln.fasta -g .25 -m kpi-gappy
441 """
442 input_file = f"{here.parent}/samples/EOG092C4VOX_aa_aln.fasta"
443 output_file = "output/EOG092C4VOX_aa_aln.fasta.clipkit"
444
445 kwargs = dict(
446 input_file=input_file,
447 output_file=output_file,
448 input_file_format='fasta',
449 output_file_format='fasta',
450 complement=False,
451 gaps=0.25,
452 mode=TrimmingMode.kpi_gappy,
453 use_log=False,
454 )
455 execute(**kwargs)
456
457 with open(
458 f"{here.parent}/expected/EOG092C4VOX_aa_aln.fasta_kpi_gappy_custom_gaps",
459 "r",
460 ) as expected:
461 expected_content = expected.read()
462
463 with open(output_file, "r") as out_file:
464 output_content = out_file.read()
465
466 assert expected_content == output_content