Mercurial > repos > padge > clipkit
comparison clipkit_repo/tests/integration/test_gappy_mode.py @ 0:49b058e85902 draft
"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
| author | padge |
|---|---|
| date | Fri, 25 Mar 2022 13:04:31 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:49b058e85902 |
|---|---|
| 1 import pytest | |
| 2 from pathlib import Path | |
| 3 | |
| 4 from clipkit.clipkit import execute | |
| 5 from clipkit.files import FileFormat | |
| 6 from clipkit.modes import TrimmingMode | |
| 7 | |
| 8 here = Path(__file__) | |
| 9 | |
| 10 | |
| 11 @pytest.mark.integration | |
| 12 class TestGappyMode(object): | |
| 13 def test_simple_no_change(self): | |
| 14 """ | |
| 15 test gappy where no changes are expected in the resulting | |
| 16 output alignment. | |
| 17 usage: clipkit simple.fa | |
| 18 """ | |
| 19 input_file = f"{here.parent}/samples/simple.fa" | |
| 20 output_file = "output/simpla.fa.clipkit" | |
| 21 kwargs = dict( | |
| 22 input_file=input_file, | |
| 23 output_file=output_file, | |
| 24 input_file_format='fasta', | |
| 25 output_file_format='fasta', | |
| 26 complement=False, | |
| 27 gaps=0.9, | |
| 28 mode=TrimmingMode.gappy, | |
| 29 use_log=False, | |
| 30 ) | |
| 31 execute(**kwargs) | |
| 32 | |
| 33 with open(input_file, "r") as expected: | |
| 34 expected_content = expected.read() | |
| 35 | |
| 36 with open(output_file, "r") as out_file: | |
| 37 output_content = out_file.read() | |
| 38 | |
| 39 assert expected_content == output_content | |
| 40 | |
| 41 def test_simple_no_change_long_description(self): | |
| 42 """ | |
| 43 test gappy where no changes are expected in the resulting | |
| 44 output alignment. | |
| 45 usage: clipkit simple.fa | |
| 46 """ | |
| 47 input_file = f"{here.parent}/samples/simple_long_description.fa" | |
| 48 output_file = "output/simple_long_description.fa.clipkit" | |
| 49 kwargs = dict( | |
| 50 input_file=input_file, | |
| 51 output_file=output_file, | |
| 52 input_file_format='fasta', | |
| 53 output_file_format='fasta', | |
| 54 complement=False, | |
| 55 gaps=0.9, | |
| 56 mode=TrimmingMode.gappy, | |
| 57 use_log=False, | |
| 58 ) | |
| 59 execute(**kwargs) | |
| 60 | |
| 61 with open(input_file, "r") as expected: | |
| 62 expected_content = expected.read() | |
| 63 | |
| 64 with open(output_file, "r") as out_file: | |
| 65 output_content = out_file.read() | |
| 66 | |
| 67 assert expected_content == output_content | |
| 68 | |
| 69 def test_12_YIL115C_Anc_2_253_codon_aln(self): | |
| 70 """ | |
| 71 test gappy with codon alignment of yeast sequences | |
| 72 usage: clipkit 12_YIL115C_Anc_2.253_codon_aln.fasta | |
| 73 """ | |
| 74 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_codon_aln.fasta" | |
| 75 output_file = "output/12_YIL115C_Anc_2.253_codon_aln.fasta.clipkit" | |
| 76 | |
| 77 kwargs = dict( | |
| 78 input_file=input_file, | |
| 79 output_file=output_file, | |
| 80 input_file_format='fasta', | |
| 81 output_file_format='fasta', | |
| 82 complement=False, | |
| 83 gaps=0.9, | |
| 84 mode=TrimmingMode.gappy, | |
| 85 use_log=False, | |
| 86 ) | |
| 87 execute(**kwargs) | |
| 88 | |
| 89 with open( | |
| 90 f"{here.parent}/expected/12_YIL115C_Anc_2.253_codon_aln.fasta_gappy", "r" | |
| 91 ) as expected: | |
| 92 expected_content = expected.read() | |
| 93 | |
| 94 with open(output_file, "r") as out_file: | |
| 95 output_content = out_file.read() | |
| 96 | |
| 97 assert expected_content == output_content | |
| 98 | |
| 99 def test_12_YIL115C_Anc_2_253_aa_aln(self): | |
| 100 """ | |
| 101 test gappy with amino acid alignment of yeast sequences | |
| 102 usage: clipkit 12_YIL115C_Anc_2.253_aa_aln.fasta | |
| 103 """ | |
| 104 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_aa_aln.fasta" | |
| 105 output_file = "output/12_YIL115C_Anc_2.253_aa_aln.fasta.clipkit" | |
| 106 | |
| 107 kwargs = dict( | |
| 108 input_file=input_file, | |
| 109 output_file=output_file, | |
| 110 input_file_format='fasta', | |
| 111 output_file_format='fasta', | |
| 112 complement=False, | |
| 113 gaps=0.9, | |
| 114 mode=TrimmingMode.gappy, | |
| 115 use_log=False, | |
| 116 ) | |
| 117 execute(**kwargs) | |
| 118 | |
| 119 with open( | |
| 120 f"{here.parent}/expected/12_YIL115C_Anc_2.253_aa_aln.fasta_gappy", "r" | |
| 121 ) as expected: | |
| 122 expected_content = expected.read() | |
| 123 | |
| 124 with open(output_file, "r") as out_file: | |
| 125 output_content = out_file.read() | |
| 126 | |
| 127 assert expected_content == output_content | |
| 128 | |
| 129 def test_24_ENSG00000163519_aa_aln(self): | |
| 130 """ | |
| 131 test gappy with amino acid alignment of mammalian sequences | |
| 132 usage: clipkit 24_ENSG00000163519_aa_aln.fasta | |
| 133 """ | |
| 134 input_file = f"{here.parent}/samples/24_ENSG00000163519_aa_aln.fasta" | |
| 135 output_file = "output/24_ENSG00000163519_aa_aln.fasta.clipkit" | |
| 136 | |
| 137 kwargs = dict( | |
| 138 input_file=input_file, | |
| 139 output_file=output_file, | |
| 140 input_file_format='fasta', | |
| 141 output_file_format='fasta', | |
| 142 complement=False, | |
| 143 gaps=0.9, | |
| 144 mode=TrimmingMode.gappy, | |
| 145 use_log=False, | |
| 146 ) | |
| 147 execute(**kwargs) | |
| 148 | |
| 149 with open( | |
| 150 f"{here.parent}/expected/24_ENSG00000163519_aa_aln.fasta_gappy", "r" | |
| 151 ) as expected: | |
| 152 expected_content = expected.read() | |
| 153 | |
| 154 with open(output_file, "r") as out_file: | |
| 155 output_content = out_file.read() | |
| 156 | |
| 157 assert expected_content == output_content | |
| 158 | |
| 159 def test_24_ENSG00000163519_codon_aln(self): | |
| 160 """ | |
| 161 test gappy with codon alignment of mammalian sequences | |
| 162 usage: clipkit 24_ENSG00000163519_codon_aln.fasta | |
| 163 """ | |
| 164 input_file = f"{here.parent}/samples/24_ENSG00000163519_codon_aln.fasta" | |
| 165 output_file = "output/24_ENSG00000163519_codon_aln.fasta.clipkit" | |
| 166 | |
| 167 kwargs = dict( | |
| 168 input_file=input_file, | |
| 169 output_file=output_file, | |
| 170 input_file_format='fasta', | |
| 171 output_file_format='fasta', | |
| 172 complement=False, | |
| 173 gaps=0.9, | |
| 174 mode=TrimmingMode.gappy, | |
| 175 use_log=False, | |
| 176 ) | |
| 177 execute(**kwargs) | |
| 178 | |
| 179 with open( | |
| 180 f"{here.parent}/expected/24_ENSG00000163519_codon_aln.fasta_gappy", "r" | |
| 181 ) as expected: | |
| 182 expected_content = expected.read() | |
| 183 | |
| 184 with open(output_file, "r") as out_file: | |
| 185 output_content = out_file.read() | |
| 186 | |
| 187 assert expected_content == output_content | |
| 188 | |
| 189 def test_EOG091N44M8_aa(self): | |
| 190 """ | |
| 191 test gappy with amino acid alignment of Penicillium sequences | |
| 192 usage: clipkit EOG091N44M8_aa.fa | |
| 193 """ | |
| 194 input_file = f"{here.parent}/samples/EOG091N44M8_aa.fa" | |
| 195 output_file = "output/EOG091N44M8_aa.fa.clipkit" | |
| 196 | |
| 197 kwargs = dict( | |
| 198 input_file=input_file, | |
| 199 output_file=output_file, | |
| 200 input_file_format='fasta', | |
| 201 output_file_format='fasta', | |
| 202 complement=False, | |
| 203 gaps=0.9, | |
| 204 mode=TrimmingMode.gappy, | |
| 205 use_log=False, | |
| 206 ) | |
| 207 execute(**kwargs) | |
| 208 | |
| 209 with open(f"{here.parent}/expected/EOG091N44M8_aa.fa_gappy", "r") as expected: | |
| 210 expected_content = expected.read() | |
| 211 | |
| 212 with open(output_file, "r") as out_file: | |
| 213 output_content = out_file.read() | |
| 214 | |
| 215 assert expected_content == output_content | |
| 216 | |
| 217 def test_EOG091N44M8_nt(self): | |
| 218 """ | |
| 219 test gappy with nucleotide alignment of Penicillium sequences | |
| 220 usage: clipkit EOG091N44M8_nt.fa | |
| 221 """ | |
| 222 input_file = f"{here.parent}/samples/EOG091N44M8_nt.fa" | |
| 223 output_file = "output/EOG091N44M8_nt.fa.clipkit" | |
| 224 | |
| 225 kwargs = dict( | |
| 226 input_file=input_file, | |
| 227 output_file=output_file, | |
| 228 input_file_format='fasta', | |
| 229 output_file_format='fasta', | |
| 230 complement=False, | |
| 231 gaps=0.9, | |
| 232 mode=TrimmingMode.gappy, | |
| 233 use_log=False, | |
| 234 ) | |
| 235 execute(**kwargs) | |
| 236 | |
| 237 with open(f"{here.parent}/expected/EOG091N44M8_nt.fa_gappy", "r") as expected: | |
| 238 expected_content = expected.read() | |
| 239 | |
| 240 with open(output_file, "r") as out_file: | |
| 241 output_content = out_file.read() | |
| 242 | |
| 243 assert expected_content == output_content | |
| 244 | |
| 245 @pytest.mark.slow | |
| 246 def test_EOG092C4VOX_aa(self): | |
| 247 """ | |
| 248 test gappy with amino alignment of fungal sequences | |
| 249 usage: clipkit EOG092C4VOX_aa_aln.fasta | |
| 250 """ | |
| 251 input_file = f"{here.parent}/samples/EOG092C4VOX_aa_aln.fasta" | |
| 252 output_file = "output/EOG092C4VOX_aa_aln.fasta.clipkit" | |
| 253 | |
| 254 kwargs = dict( | |
| 255 input_file=input_file, | |
| 256 output_file=output_file, | |
| 257 input_file_format='fasta', | |
| 258 output_file_format='fasta', | |
| 259 complement=False, | |
| 260 gaps=0.9, | |
| 261 mode=TrimmingMode.gappy, | |
| 262 use_log=False, | |
| 263 ) | |
| 264 execute(**kwargs) | |
| 265 | |
| 266 with open( | |
| 267 f"{here.parent}/expected/EOG092C4VOX_aa_aln.fasta_gappy", "r" | |
| 268 ) as expected: | |
| 269 expected_content = expected.read() | |
| 270 | |
| 271 with open(output_file, "r") as out_file: | |
| 272 output_content = out_file.read() | |
| 273 | |
| 274 assert expected_content == output_content | |
| 275 | |
| 276 | |
| 277 @pytest.mark.integration | |
| 278 class TestGappyModeCustomGapsParameter(object): | |
| 279 def test_simple(self): | |
| 280 """ | |
| 281 test gappy with a custom gaps parameter | |
| 282 usage: clipkit simple.fa -g 0.2 | |
| 283 """ | |
| 284 input_file = f"{here.parent}/samples/simple.fa" | |
| 285 output_file = "output/simpla.fa.clipkit" | |
| 286 | |
| 287 kwargs = dict( | |
| 288 input_file=input_file, | |
| 289 output_file=output_file, | |
| 290 input_file_format='fasta', | |
| 291 output_file_format='fasta', | |
| 292 complement=False, | |
| 293 gaps=0.2, | |
| 294 mode=TrimmingMode.gappy, | |
| 295 use_log=False, | |
| 296 ) | |
| 297 execute(**kwargs) | |
| 298 | |
| 299 with open( | |
| 300 f"{here.parent}/expected/simple.fa_gappy_gaps_set_to_0.2", "r" | |
| 301 ) as expected: | |
| 302 expected_content = expected.read() | |
| 303 | |
| 304 with open(output_file, "r") as out_file: | |
| 305 output_content = out_file.read() | |
| 306 | |
| 307 assert expected_content == output_content | |
| 308 | |
| 309 def test_12_YIL115C_Anc_2_253_codon_aln(self): | |
| 310 """ | |
| 311 test gappy with codon alignment of yeast sequences | |
| 312 usage: clipkit 12_YIL115C_Anc_2.253_codon_aln.fasta -g 0.3 | |
| 313 """ | |
| 314 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_codon_aln.fasta" | |
| 315 output_file = "output/12_YIL115C_Anc_2.253_codon_aln.fasta.clipkit" | |
| 316 | |
| 317 kwargs = dict( | |
| 318 input_file=input_file, | |
| 319 output_file=output_file, | |
| 320 input_file_format='fasta', | |
| 321 output_file_format='fasta', | |
| 322 complement=False, | |
| 323 gaps=0.3, | |
| 324 mode=TrimmingMode.gappy, | |
| 325 use_log=False, | |
| 326 ) | |
| 327 execute(**kwargs) | |
| 328 | |
| 329 with open( | |
| 330 f"{here.parent}/expected/12_YIL115C_Anc_2.253_codon_aln.fasta_gappy_custom_gaps", | |
| 331 "r", | |
| 332 ) as expected: | |
| 333 expected_content = expected.read() | |
| 334 | |
| 335 with open(output_file, "r") as out_file: | |
| 336 output_content = out_file.read() | |
| 337 | |
| 338 assert expected_content == output_content | |
| 339 | |
| 340 def test_24_ENSG00000163519_codon_aln(self): | |
| 341 """ | |
| 342 test gappy with codon alignment of mammalian sequences | |
| 343 usage: clipkit 24_ENSG00000163519_codon_aln.fasta -g .4 | |
| 344 """ | |
| 345 input_file = f"{here.parent}/samples/24_ENSG00000163519_codon_aln.fasta" | |
| 346 output_file = "output/24_ENSG00000163519_codon_aln.fasta.clipkit" | |
| 347 | |
| 348 kwargs = dict( | |
| 349 input_file=input_file, | |
| 350 output_file=output_file, | |
| 351 input_file_format='fasta', | |
| 352 output_file_format='fasta', | |
| 353 complement=False, | |
| 354 gaps=0.4, | |
| 355 mode=TrimmingMode.gappy, | |
| 356 use_log=False, | |
| 357 ) | |
| 358 execute(**kwargs) | |
| 359 | |
| 360 with open( | |
| 361 f"{here.parent}/expected/24_ENSG00000163519_codon_aln.fasta_gappy_custom_gaps", | |
| 362 "r", | |
| 363 ) as expected: | |
| 364 expected_content = expected.read() | |
| 365 | |
| 366 with open(output_file, "r") as out_file: | |
| 367 output_content = out_file.read() | |
| 368 | |
| 369 assert expected_content == output_content | |
| 370 | |
| 371 def test_EOG091N44M8_nt(self): | |
| 372 """ | |
| 373 test gappy with nucleotide alignment of Penicillium sequences | |
| 374 usage: clipkit EOG091N44M8_nt.fa -g .1 | |
| 375 """ | |
| 376 input_file = f"{here.parent}/samples/EOG091N44M8_nt.fa" | |
| 377 output_file = "output/EOG091N44M8_nt.fa.clipkit" | |
| 378 | |
| 379 kwargs = dict( | |
| 380 input_file=input_file, | |
| 381 output_file=output_file, | |
| 382 input_file_format='fasta', | |
| 383 output_file_format='fasta', | |
| 384 complement=False, | |
| 385 gaps=0.1, | |
| 386 mode=TrimmingMode.gappy, | |
| 387 use_log=False, | |
| 388 ) | |
| 389 execute(**kwargs) | |
| 390 | |
| 391 with open( | |
| 392 f"{here.parent}/expected/EOG091N44M8_nt.fa_gappy_custom_gaps", "r" | |
| 393 ) as expected: | |
| 394 expected_content = expected.read() | |
| 395 | |
| 396 with open(output_file, "r") as out_file: | |
| 397 output_content = out_file.read() | |
| 398 | |
| 399 assert expected_content == output_content | |
| 400 | |
| 401 @pytest.mark.slow | |
| 402 def test_EOG092C0CZK_aa(self): | |
| 403 """ | |
| 404 test gappy with amino alignment of fungal sequences | |
| 405 usage: clipkit EOG092C0CZK_aa_aln.fasta -g .5 | |
| 406 """ | |
| 407 input_file = f"{here.parent}/samples/EOG092C0CZK_aa_aln.fasta" | |
| 408 output_file = "output/EOG092C0CZK_aa_aln.fasta.clipkit" | |
| 409 | |
| 410 kwargs = dict( | |
| 411 input_file=input_file, | |
| 412 output_file=output_file, | |
| 413 input_file_format='fasta', | |
| 414 output_file_format='fasta', | |
| 415 complement=False, | |
| 416 gaps=0.5, | |
| 417 mode=TrimmingMode.gappy, | |
| 418 use_log=False, | |
| 419 ) | |
| 420 execute(**kwargs) | |
| 421 | |
| 422 with open( | |
| 423 f"{here.parent}/expected/EOG092C0CZK_aa_aln.fasta_gappy_custom_gaps", "r" | |
| 424 ) as expected: | |
| 425 expected_content = expected.read() | |
| 426 | |
| 427 with open(output_file, "r") as out_file: | |
| 428 output_content = out_file.read() | |
| 429 | |
| 430 assert expected_content == output_content | |
| 431 | |
| 432 @pytest.mark.slow | |
| 433 def test_EOG092C4VOX_aa(self): | |
| 434 """ | |
| 435 test gappy with amino alignment of fungal sequences | |
| 436 usage: clipkit EOG092C4VOX_aa_aln.fasta -g .25 | |
| 437 """ | |
| 438 input_file = f"{here.parent}/samples/EOG092C4VOX_aa_aln.fasta" | |
| 439 output_file = "output/EOG092C4VOX_aa_aln.fasta.clipkit" | |
| 440 | |
| 441 kwargs = dict( | |
| 442 input_file=input_file, | |
| 443 output_file=output_file, | |
| 444 input_file_format='fasta', | |
| 445 output_file_format='fasta', | |
| 446 complement=False, | |
| 447 gaps=0.25, | |
| 448 mode=TrimmingMode.gappy, | |
| 449 use_log=False, | |
| 450 ) | |
| 451 execute(**kwargs) | |
| 452 | |
| 453 with open( | |
| 454 f"{here.parent}/expected/EOG092C4VOX_aa_aln.fasta_gappy_custom_gaps", "r" | |
| 455 ) as expected: | |
| 456 expected_content = expected.read() | |
| 457 | |
| 458 with open(output_file, "r") as out_file: | |
| 459 output_content = out_file.read() | |
| 460 | |
| 461 assert expected_content == output_content |
