comparison clipkit_repo/tests/integration/test_gappy_mode.py @ 0:49b058e85902 draft

"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author padge
date Fri, 25 Mar 2022 13:04:31 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:49b058e85902
1 import pytest
2 from pathlib import Path
3
4 from clipkit.clipkit import execute
5 from clipkit.files import FileFormat
6 from clipkit.modes import TrimmingMode
7
8 here = Path(__file__)
9
10
11 @pytest.mark.integration
12 class TestGappyMode(object):
13 def test_simple_no_change(self):
14 """
15 test gappy where no changes are expected in the resulting
16 output alignment.
17 usage: clipkit simple.fa
18 """
19 input_file = f"{here.parent}/samples/simple.fa"
20 output_file = "output/simpla.fa.clipkit"
21 kwargs = dict(
22 input_file=input_file,
23 output_file=output_file,
24 input_file_format='fasta',
25 output_file_format='fasta',
26 complement=False,
27 gaps=0.9,
28 mode=TrimmingMode.gappy,
29 use_log=False,
30 )
31 execute(**kwargs)
32
33 with open(input_file, "r") as expected:
34 expected_content = expected.read()
35
36 with open(output_file, "r") as out_file:
37 output_content = out_file.read()
38
39 assert expected_content == output_content
40
41 def test_simple_no_change_long_description(self):
42 """
43 test gappy where no changes are expected in the resulting
44 output alignment.
45 usage: clipkit simple.fa
46 """
47 input_file = f"{here.parent}/samples/simple_long_description.fa"
48 output_file = "output/simple_long_description.fa.clipkit"
49 kwargs = dict(
50 input_file=input_file,
51 output_file=output_file,
52 input_file_format='fasta',
53 output_file_format='fasta',
54 complement=False,
55 gaps=0.9,
56 mode=TrimmingMode.gappy,
57 use_log=False,
58 )
59 execute(**kwargs)
60
61 with open(input_file, "r") as expected:
62 expected_content = expected.read()
63
64 with open(output_file, "r") as out_file:
65 output_content = out_file.read()
66
67 assert expected_content == output_content
68
69 def test_12_YIL115C_Anc_2_253_codon_aln(self):
70 """
71 test gappy with codon alignment of yeast sequences
72 usage: clipkit 12_YIL115C_Anc_2.253_codon_aln.fasta
73 """
74 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_codon_aln.fasta"
75 output_file = "output/12_YIL115C_Anc_2.253_codon_aln.fasta.clipkit"
76
77 kwargs = dict(
78 input_file=input_file,
79 output_file=output_file,
80 input_file_format='fasta',
81 output_file_format='fasta',
82 complement=False,
83 gaps=0.9,
84 mode=TrimmingMode.gappy,
85 use_log=False,
86 )
87 execute(**kwargs)
88
89 with open(
90 f"{here.parent}/expected/12_YIL115C_Anc_2.253_codon_aln.fasta_gappy", "r"
91 ) as expected:
92 expected_content = expected.read()
93
94 with open(output_file, "r") as out_file:
95 output_content = out_file.read()
96
97 assert expected_content == output_content
98
99 def test_12_YIL115C_Anc_2_253_aa_aln(self):
100 """
101 test gappy with amino acid alignment of yeast sequences
102 usage: clipkit 12_YIL115C_Anc_2.253_aa_aln.fasta
103 """
104 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_aa_aln.fasta"
105 output_file = "output/12_YIL115C_Anc_2.253_aa_aln.fasta.clipkit"
106
107 kwargs = dict(
108 input_file=input_file,
109 output_file=output_file,
110 input_file_format='fasta',
111 output_file_format='fasta',
112 complement=False,
113 gaps=0.9,
114 mode=TrimmingMode.gappy,
115 use_log=False,
116 )
117 execute(**kwargs)
118
119 with open(
120 f"{here.parent}/expected/12_YIL115C_Anc_2.253_aa_aln.fasta_gappy", "r"
121 ) as expected:
122 expected_content = expected.read()
123
124 with open(output_file, "r") as out_file:
125 output_content = out_file.read()
126
127 assert expected_content == output_content
128
129 def test_24_ENSG00000163519_aa_aln(self):
130 """
131 test gappy with amino acid alignment of mammalian sequences
132 usage: clipkit 24_ENSG00000163519_aa_aln.fasta
133 """
134 input_file = f"{here.parent}/samples/24_ENSG00000163519_aa_aln.fasta"
135 output_file = "output/24_ENSG00000163519_aa_aln.fasta.clipkit"
136
137 kwargs = dict(
138 input_file=input_file,
139 output_file=output_file,
140 input_file_format='fasta',
141 output_file_format='fasta',
142 complement=False,
143 gaps=0.9,
144 mode=TrimmingMode.gappy,
145 use_log=False,
146 )
147 execute(**kwargs)
148
149 with open(
150 f"{here.parent}/expected/24_ENSG00000163519_aa_aln.fasta_gappy", "r"
151 ) as expected:
152 expected_content = expected.read()
153
154 with open(output_file, "r") as out_file:
155 output_content = out_file.read()
156
157 assert expected_content == output_content
158
159 def test_24_ENSG00000163519_codon_aln(self):
160 """
161 test gappy with codon alignment of mammalian sequences
162 usage: clipkit 24_ENSG00000163519_codon_aln.fasta
163 """
164 input_file = f"{here.parent}/samples/24_ENSG00000163519_codon_aln.fasta"
165 output_file = "output/24_ENSG00000163519_codon_aln.fasta.clipkit"
166
167 kwargs = dict(
168 input_file=input_file,
169 output_file=output_file,
170 input_file_format='fasta',
171 output_file_format='fasta',
172 complement=False,
173 gaps=0.9,
174 mode=TrimmingMode.gappy,
175 use_log=False,
176 )
177 execute(**kwargs)
178
179 with open(
180 f"{here.parent}/expected/24_ENSG00000163519_codon_aln.fasta_gappy", "r"
181 ) as expected:
182 expected_content = expected.read()
183
184 with open(output_file, "r") as out_file:
185 output_content = out_file.read()
186
187 assert expected_content == output_content
188
189 def test_EOG091N44M8_aa(self):
190 """
191 test gappy with amino acid alignment of Penicillium sequences
192 usage: clipkit EOG091N44M8_aa.fa
193 """
194 input_file = f"{here.parent}/samples/EOG091N44M8_aa.fa"
195 output_file = "output/EOG091N44M8_aa.fa.clipkit"
196
197 kwargs = dict(
198 input_file=input_file,
199 output_file=output_file,
200 input_file_format='fasta',
201 output_file_format='fasta',
202 complement=False,
203 gaps=0.9,
204 mode=TrimmingMode.gappy,
205 use_log=False,
206 )
207 execute(**kwargs)
208
209 with open(f"{here.parent}/expected/EOG091N44M8_aa.fa_gappy", "r") as expected:
210 expected_content = expected.read()
211
212 with open(output_file, "r") as out_file:
213 output_content = out_file.read()
214
215 assert expected_content == output_content
216
217 def test_EOG091N44M8_nt(self):
218 """
219 test gappy with nucleotide alignment of Penicillium sequences
220 usage: clipkit EOG091N44M8_nt.fa
221 """
222 input_file = f"{here.parent}/samples/EOG091N44M8_nt.fa"
223 output_file = "output/EOG091N44M8_nt.fa.clipkit"
224
225 kwargs = dict(
226 input_file=input_file,
227 output_file=output_file,
228 input_file_format='fasta',
229 output_file_format='fasta',
230 complement=False,
231 gaps=0.9,
232 mode=TrimmingMode.gappy,
233 use_log=False,
234 )
235 execute(**kwargs)
236
237 with open(f"{here.parent}/expected/EOG091N44M8_nt.fa_gappy", "r") as expected:
238 expected_content = expected.read()
239
240 with open(output_file, "r") as out_file:
241 output_content = out_file.read()
242
243 assert expected_content == output_content
244
245 @pytest.mark.slow
246 def test_EOG092C4VOX_aa(self):
247 """
248 test gappy with amino alignment of fungal sequences
249 usage: clipkit EOG092C4VOX_aa_aln.fasta
250 """
251 input_file = f"{here.parent}/samples/EOG092C4VOX_aa_aln.fasta"
252 output_file = "output/EOG092C4VOX_aa_aln.fasta.clipkit"
253
254 kwargs = dict(
255 input_file=input_file,
256 output_file=output_file,
257 input_file_format='fasta',
258 output_file_format='fasta',
259 complement=False,
260 gaps=0.9,
261 mode=TrimmingMode.gappy,
262 use_log=False,
263 )
264 execute(**kwargs)
265
266 with open(
267 f"{here.parent}/expected/EOG092C4VOX_aa_aln.fasta_gappy", "r"
268 ) as expected:
269 expected_content = expected.read()
270
271 with open(output_file, "r") as out_file:
272 output_content = out_file.read()
273
274 assert expected_content == output_content
275
276
277 @pytest.mark.integration
278 class TestGappyModeCustomGapsParameter(object):
279 def test_simple(self):
280 """
281 test gappy with a custom gaps parameter
282 usage: clipkit simple.fa -g 0.2
283 """
284 input_file = f"{here.parent}/samples/simple.fa"
285 output_file = "output/simpla.fa.clipkit"
286
287 kwargs = dict(
288 input_file=input_file,
289 output_file=output_file,
290 input_file_format='fasta',
291 output_file_format='fasta',
292 complement=False,
293 gaps=0.2,
294 mode=TrimmingMode.gappy,
295 use_log=False,
296 )
297 execute(**kwargs)
298
299 with open(
300 f"{here.parent}/expected/simple.fa_gappy_gaps_set_to_0.2", "r"
301 ) as expected:
302 expected_content = expected.read()
303
304 with open(output_file, "r") as out_file:
305 output_content = out_file.read()
306
307 assert expected_content == output_content
308
309 def test_12_YIL115C_Anc_2_253_codon_aln(self):
310 """
311 test gappy with codon alignment of yeast sequences
312 usage: clipkit 12_YIL115C_Anc_2.253_codon_aln.fasta -g 0.3
313 """
314 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_codon_aln.fasta"
315 output_file = "output/12_YIL115C_Anc_2.253_codon_aln.fasta.clipkit"
316
317 kwargs = dict(
318 input_file=input_file,
319 output_file=output_file,
320 input_file_format='fasta',
321 output_file_format='fasta',
322 complement=False,
323 gaps=0.3,
324 mode=TrimmingMode.gappy,
325 use_log=False,
326 )
327 execute(**kwargs)
328
329 with open(
330 f"{here.parent}/expected/12_YIL115C_Anc_2.253_codon_aln.fasta_gappy_custom_gaps",
331 "r",
332 ) as expected:
333 expected_content = expected.read()
334
335 with open(output_file, "r") as out_file:
336 output_content = out_file.read()
337
338 assert expected_content == output_content
339
340 def test_24_ENSG00000163519_codon_aln(self):
341 """
342 test gappy with codon alignment of mammalian sequences
343 usage: clipkit 24_ENSG00000163519_codon_aln.fasta -g .4
344 """
345 input_file = f"{here.parent}/samples/24_ENSG00000163519_codon_aln.fasta"
346 output_file = "output/24_ENSG00000163519_codon_aln.fasta.clipkit"
347
348 kwargs = dict(
349 input_file=input_file,
350 output_file=output_file,
351 input_file_format='fasta',
352 output_file_format='fasta',
353 complement=False,
354 gaps=0.4,
355 mode=TrimmingMode.gappy,
356 use_log=False,
357 )
358 execute(**kwargs)
359
360 with open(
361 f"{here.parent}/expected/24_ENSG00000163519_codon_aln.fasta_gappy_custom_gaps",
362 "r",
363 ) as expected:
364 expected_content = expected.read()
365
366 with open(output_file, "r") as out_file:
367 output_content = out_file.read()
368
369 assert expected_content == output_content
370
371 def test_EOG091N44M8_nt(self):
372 """
373 test gappy with nucleotide alignment of Penicillium sequences
374 usage: clipkit EOG091N44M8_nt.fa -g .1
375 """
376 input_file = f"{here.parent}/samples/EOG091N44M8_nt.fa"
377 output_file = "output/EOG091N44M8_nt.fa.clipkit"
378
379 kwargs = dict(
380 input_file=input_file,
381 output_file=output_file,
382 input_file_format='fasta',
383 output_file_format='fasta',
384 complement=False,
385 gaps=0.1,
386 mode=TrimmingMode.gappy,
387 use_log=False,
388 )
389 execute(**kwargs)
390
391 with open(
392 f"{here.parent}/expected/EOG091N44M8_nt.fa_gappy_custom_gaps", "r"
393 ) as expected:
394 expected_content = expected.read()
395
396 with open(output_file, "r") as out_file:
397 output_content = out_file.read()
398
399 assert expected_content == output_content
400
401 @pytest.mark.slow
402 def test_EOG092C0CZK_aa(self):
403 """
404 test gappy with amino alignment of fungal sequences
405 usage: clipkit EOG092C0CZK_aa_aln.fasta -g .5
406 """
407 input_file = f"{here.parent}/samples/EOG092C0CZK_aa_aln.fasta"
408 output_file = "output/EOG092C0CZK_aa_aln.fasta.clipkit"
409
410 kwargs = dict(
411 input_file=input_file,
412 output_file=output_file,
413 input_file_format='fasta',
414 output_file_format='fasta',
415 complement=False,
416 gaps=0.5,
417 mode=TrimmingMode.gappy,
418 use_log=False,
419 )
420 execute(**kwargs)
421
422 with open(
423 f"{here.parent}/expected/EOG092C0CZK_aa_aln.fasta_gappy_custom_gaps", "r"
424 ) as expected:
425 expected_content = expected.read()
426
427 with open(output_file, "r") as out_file:
428 output_content = out_file.read()
429
430 assert expected_content == output_content
431
432 @pytest.mark.slow
433 def test_EOG092C4VOX_aa(self):
434 """
435 test gappy with amino alignment of fungal sequences
436 usage: clipkit EOG092C4VOX_aa_aln.fasta -g .25
437 """
438 input_file = f"{here.parent}/samples/EOG092C4VOX_aa_aln.fasta"
439 output_file = "output/EOG092C4VOX_aa_aln.fasta.clipkit"
440
441 kwargs = dict(
442 input_file=input_file,
443 output_file=output_file,
444 input_file_format='fasta',
445 output_file_format='fasta',
446 complement=False,
447 gaps=0.25,
448 mode=TrimmingMode.gappy,
449 use_log=False,
450 )
451 execute(**kwargs)
452
453 with open(
454 f"{here.parent}/expected/EOG092C4VOX_aa_aln.fasta_gappy_custom_gaps", "r"
455 ) as expected:
456 expected_content = expected.read()
457
458 with open(output_file, "r") as out_file:
459 output_content = out_file.read()
460
461 assert expected_content == output_content