Mercurial > repos > padge > clipkit
comparison clipkit_repo/tests/integration/test_kpi_gappy_mode.py @ 0:49b058e85902 draft
"planemo upload for repository https://github.com/jlsteenwyk/clipkit commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
| author | padge |
|---|---|
| date | Fri, 25 Mar 2022 13:04:31 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:49b058e85902 |
|---|---|
| 1 import pytest | |
| 2 from pathlib import Path | |
| 3 | |
| 4 from clipkit.clipkit import execute | |
| 5 from clipkit.files import FileFormat | |
| 6 from clipkit.modes import TrimmingMode | |
| 7 | |
| 8 here = Path(__file__) | |
| 9 | |
| 10 | |
| 11 @pytest.mark.integration | |
| 12 class TestKpiGappyMode(object): | |
| 13 def test_simple(self): | |
| 14 """ | |
| 15 usage: clipkit simple.fa -m kpi-gappy | |
| 16 """ | |
| 17 input_file = f"{here.parent}/samples/simple.fa" | |
| 18 output_file = "output/simpla.fa.TestKpiGappyMode_test_simple.clipkit" | |
| 19 | |
| 20 kwargs = dict( | |
| 21 input_file=input_file, | |
| 22 output_file=output_file, | |
| 23 input_file_format='fasta', | |
| 24 output_file_format='fasta', | |
| 25 complement=False, | |
| 26 gaps=0.9, | |
| 27 mode=TrimmingMode.kpi_gappy, | |
| 28 use_log=False, | |
| 29 ) | |
| 30 execute(**kwargs) | |
| 31 | |
| 32 with open(f"{here.parent}/expected/simple.fa_kpi_gappy", "r") as expected: | |
| 33 expected_content = expected.read() | |
| 34 | |
| 35 with open(output_file, "r") as out_file: | |
| 36 output_content = out_file.read() | |
| 37 | |
| 38 assert expected_content == output_content | |
| 39 | |
| 40 def test_simple_long_description(self): | |
| 41 """ | |
| 42 usage: clipkit simple.fa -m kpi-gappy | |
| 43 """ | |
| 44 input_file = f"{here.parent}/samples/simple_long_description.fa" | |
| 45 output_file = "output/simple_long_description.fa.clipkit_kpi_gappy" | |
| 46 | |
| 47 kwargs = dict( | |
| 48 input_file=input_file, | |
| 49 output_file=output_file, | |
| 50 input_file_format='fasta', | |
| 51 output_file_format='fasta', | |
| 52 complement=False, | |
| 53 gaps=0.9, | |
| 54 mode=TrimmingMode.kpi_gappy, | |
| 55 use_log=False, | |
| 56 ) | |
| 57 execute(**kwargs) | |
| 58 | |
| 59 with open(f"{here.parent}/expected/simple_long_description.fa_kpi_gappy", "r") as expected: | |
| 60 expected_content = expected.read() | |
| 61 | |
| 62 with open(output_file, "r") as out_file: | |
| 63 output_content = out_file.read() | |
| 64 | |
| 65 assert expected_content == output_content | |
| 66 | |
| 67 def test_12_YIL115C_Anc_2_253_codon_aln(self): | |
| 68 """ | |
| 69 test kpi_gappy with codon alignment of yeast sequences | |
| 70 usage: clipkit 12_YIL115C_Anc_2.253_codon_aln.fasta -m kpi-gappy | |
| 71 """ | |
| 72 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_codon_aln.fasta" | |
| 73 output_file = "output/12_YIL115C_Anc_2.253_codon_aln.fasta.clipkit" | |
| 74 | |
| 75 kwargs = dict( | |
| 76 input_file=input_file, | |
| 77 output_file=output_file, | |
| 78 input_file_format='fasta', | |
| 79 output_file_format='fasta', | |
| 80 complement=False, | |
| 81 gaps=0.9, | |
| 82 mode=TrimmingMode.kpi_gappy, | |
| 83 use_log=False, | |
| 84 ) | |
| 85 execute(**kwargs) | |
| 86 | |
| 87 with open( | |
| 88 f"{here.parent}/expected/12_YIL115C_Anc_2.253_codon_aln.fasta_kpi_gappy", | |
| 89 "r", | |
| 90 ) as expected: | |
| 91 expected_content = expected.read() | |
| 92 | |
| 93 with open(output_file, "r") as out_file: | |
| 94 output_content = out_file.read() | |
| 95 | |
| 96 assert expected_content == output_content | |
| 97 | |
| 98 def test_12_YIL115C_Anc_2_253_aa_aln(self): | |
| 99 """ | |
| 100 test kpi_gappy with amino acid alignment of yeast sequences | |
| 101 usage: clipkit 12_YIL115C_Anc_2.253_aa_aln.fasta -m kpi-gappy | |
| 102 """ | |
| 103 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_aa_aln.fasta" | |
| 104 output_file = "output/12_YIL115C_Anc_2.253_aa_aln.fasta.clipkit" | |
| 105 | |
| 106 kwargs = dict( | |
| 107 input_file=input_file, | |
| 108 output_file=output_file, | |
| 109 input_file_format='fasta', | |
| 110 output_file_format='fasta', | |
| 111 complement=False, | |
| 112 gaps=0.9, | |
| 113 mode=TrimmingMode.kpi_gappy, | |
| 114 use_log=False, | |
| 115 ) | |
| 116 execute(**kwargs) | |
| 117 | |
| 118 with open( | |
| 119 f"{here.parent}/expected/12_YIL115C_Anc_2.253_aa_aln.fasta_kpi_gappy", "r" | |
| 120 ) as expected: | |
| 121 expected_content = expected.read() | |
| 122 | |
| 123 with open(output_file, "r") as out_file: | |
| 124 output_content = out_file.read() | |
| 125 | |
| 126 assert expected_content == output_content | |
| 127 | |
| 128 def test_24_ENSG00000163519_aa_aln(self): | |
| 129 """ | |
| 130 test kpi_gappy with amino acid alignment of mammalian sequences | |
| 131 usage: clipkit 24_ENSG00000163519_aa_aln.fasta -m kpi-gappy | |
| 132 """ | |
| 133 input_file = f"{here.parent}/samples/24_ENSG00000163519_aa_aln.fasta" | |
| 134 output_file = "output/24_ENSG00000163519_aa_aln.fasta.clipkit" | |
| 135 | |
| 136 kwargs = dict( | |
| 137 input_file=input_file, | |
| 138 output_file=output_file, | |
| 139 input_file_format='fasta', | |
| 140 output_file_format='fasta', | |
| 141 complement=False, | |
| 142 gaps=0.9, | |
| 143 mode=TrimmingMode.kpi_gappy, | |
| 144 use_log=False, | |
| 145 ) | |
| 146 execute(**kwargs) | |
| 147 | |
| 148 with open( | |
| 149 f"{here.parent}/expected/24_ENSG00000163519_aa_aln.fasta_kpi_gappy", "r" | |
| 150 ) as expected: | |
| 151 expected_content = expected.read() | |
| 152 | |
| 153 with open(output_file, "r") as out_file: | |
| 154 output_content = out_file.read() | |
| 155 | |
| 156 assert expected_content == output_content | |
| 157 | |
| 158 def test_24_ENSG00000163519_codon_aln(self): | |
| 159 """ | |
| 160 test kpi_gappy with codon alignment of mammalian sequences | |
| 161 usage: clipkit 24_ENSG00000163519_codon_aln.fasta -m kpi-gappy | |
| 162 """ | |
| 163 input_file = f"{here.parent}/samples/24_ENSG00000163519_codon_aln.fasta" | |
| 164 output_file = "output/24_ENSG00000163519_codon_aln.fasta.clipkit" | |
| 165 | |
| 166 kwargs = dict( | |
| 167 input_file=input_file, | |
| 168 output_file=output_file, | |
| 169 input_file_format='fasta', | |
| 170 output_file_format='fasta', | |
| 171 complement=False, | |
| 172 gaps=0.9, | |
| 173 mode=TrimmingMode.kpi_gappy, | |
| 174 use_log=False, | |
| 175 ) | |
| 176 execute(**kwargs) | |
| 177 | |
| 178 with open( | |
| 179 f"{here.parent}/expected/24_ENSG00000163519_codon_aln.fasta_kpi_gappy", "r" | |
| 180 ) as expected: | |
| 181 expected_content = expected.read() | |
| 182 | |
| 183 with open(output_file, "r") as out_file: | |
| 184 output_content = out_file.read() | |
| 185 | |
| 186 assert expected_content == output_content | |
| 187 | |
| 188 def test_EOG091N44M8_aa(self): | |
| 189 """ | |
| 190 test kpi_gappy with amino acid alignment of Penicillium sequences | |
| 191 usage: clipkit EOG091N44M8_aa.fa -m kpi-gappy | |
| 192 """ | |
| 193 input_file = f"{here.parent}/samples/EOG091N44M8_aa.fa" | |
| 194 output_file = "output/EOG091N44M8_aa.fa.clipkit" | |
| 195 | |
| 196 kwargs = dict( | |
| 197 input_file=input_file, | |
| 198 output_file=output_file, | |
| 199 input_file_format='fasta', | |
| 200 output_file_format='fasta', | |
| 201 complement=False, | |
| 202 gaps=0.9, | |
| 203 mode=TrimmingMode.kpi_gappy, | |
| 204 use_log=False, | |
| 205 ) | |
| 206 execute(**kwargs) | |
| 207 | |
| 208 with open( | |
| 209 f"{here.parent}/expected/EOG091N44M8_aa.fa_kpi_gappy", "r" | |
| 210 ) as expected: | |
| 211 expected_content = expected.read() | |
| 212 | |
| 213 with open(output_file, "r") as out_file: | |
| 214 output_content = out_file.read() | |
| 215 | |
| 216 assert expected_content == output_content | |
| 217 | |
| 218 def test_EOG091N44M8_nt(self): | |
| 219 """ | |
| 220 test kpi_gappy with nucleotide alignment of Penicillium sequences | |
| 221 usage: clipkit EOG091N44M8_nt.fa -m kpi-gappy | |
| 222 """ | |
| 223 input_file = f"{here.parent}/samples/EOG091N44M8_nt.fa" | |
| 224 output_file = "output/EOG091N44M8_nt.fa.clipkit" | |
| 225 | |
| 226 kwargs = dict( | |
| 227 input_file=input_file, | |
| 228 output_file=output_file, | |
| 229 input_file_format='fasta', | |
| 230 output_file_format='fasta', | |
| 231 complement=False, | |
| 232 gaps=0.9, | |
| 233 mode=TrimmingMode.kpi_gappy, | |
| 234 use_log=False, | |
| 235 ) | |
| 236 execute(**kwargs) | |
| 237 | |
| 238 with open( | |
| 239 f"{here.parent}/expected/EOG091N44M8_nt.fa_kpi_gappy", "r" | |
| 240 ) as expected: | |
| 241 expected_content = expected.read() | |
| 242 | |
| 243 with open(output_file, "r") as out_file: | |
| 244 output_content = out_file.read() | |
| 245 | |
| 246 assert expected_content == output_content | |
| 247 | |
| 248 @pytest.mark.slow | |
| 249 def test_EOG092C4VOX_aa(self): | |
| 250 """ | |
| 251 test gappy with amino alignment of fungal sequences | |
| 252 usage: clipkit EOG092C4VOX_aa_aln.fasta -m kpi-gappy | |
| 253 """ | |
| 254 input_file = f"{here.parent}/samples/EOG092C4VOX_aa_aln.fasta" | |
| 255 output_file = "output/EOG092C4VOX_aa_aln.fasta.clipkit" | |
| 256 | |
| 257 kwargs = dict( | |
| 258 input_file=input_file, | |
| 259 output_file=output_file, | |
| 260 input_file_format='fasta', | |
| 261 output_file_format='fasta', | |
| 262 complement=False, | |
| 263 gaps=0.9, | |
| 264 mode=TrimmingMode.kpi_gappy, | |
| 265 use_log=False, | |
| 266 ) | |
| 267 execute(**kwargs) | |
| 268 | |
| 269 with open( | |
| 270 f"{here.parent}/expected/EOG092C4VOX_aa_aln.fasta_kpi_gappy", "r" | |
| 271 ) as expected: | |
| 272 expected_content = expected.read() | |
| 273 | |
| 274 with open(output_file, "r") as out_file: | |
| 275 output_content = out_file.read() | |
| 276 | |
| 277 assert expected_content == output_content | |
| 278 | |
| 279 | |
| 280 @pytest.mark.integration | |
| 281 class TestKPIGappyModeCustomGapsParameter(object): | |
| 282 def test_simple(self): | |
| 283 """ | |
| 284 test kpi_gappy with a custom gaps parameter | |
| 285 usage: clipkit simple.fa -g 0.2 -m kpi-gappy | |
| 286 """ | |
| 287 input_file = f"{here.parent}/samples/simple.fa" | |
| 288 output_file = "output/simpla.fa.clipkit" | |
| 289 | |
| 290 kwargs = dict( | |
| 291 input_file=input_file, | |
| 292 output_file=output_file, | |
| 293 input_file_format='fasta', | |
| 294 output_file_format='fasta', | |
| 295 complement=False, | |
| 296 gaps=0.2, | |
| 297 mode=TrimmingMode.kpi_gappy, | |
| 298 use_log=False, | |
| 299 ) | |
| 300 execute(**kwargs) | |
| 301 | |
| 302 with open( | |
| 303 f"{here.parent}/expected/simple.fa_kpi_gappy_gaps_set_to_0.2", "r" | |
| 304 ) as expected: | |
| 305 expected_content = expected.read() | |
| 306 | |
| 307 with open(output_file, "r") as out_file: | |
| 308 output_content = out_file.read() | |
| 309 | |
| 310 assert expected_content == output_content | |
| 311 | |
| 312 def test_12_YIL115C_Anc_2_253_codon_aln(self): | |
| 313 """ | |
| 314 test kpi_gappy with codon alignment of yeast sequences | |
| 315 usage: clipkit 12_YIL115C_Anc_2.253_codon_aln.fasta -g 0.3 -m kpi-gappy | |
| 316 """ | |
| 317 input_file = f"{here.parent}/samples/12_YIL115C_Anc_2.253_codon_aln.fasta" | |
| 318 output_file = "output/12_YIL115C_Anc_2.253_codon_aln.fasta.clipkit" | |
| 319 | |
| 320 kwargs = dict( | |
| 321 input_file=input_file, | |
| 322 output_file=output_file, | |
| 323 input_file_format='fasta', | |
| 324 output_file_format='fasta', | |
| 325 complement=False, | |
| 326 gaps=0.3, | |
| 327 mode=TrimmingMode.kpi_gappy, | |
| 328 use_log=False, | |
| 329 ) | |
| 330 execute(**kwargs) | |
| 331 | |
| 332 with open( | |
| 333 f"{here.parent}/expected/12_YIL115C_Anc_2.253_codon_aln.fasta_kpi_gappy_custom_gaps", | |
| 334 "r", | |
| 335 ) as expected: | |
| 336 expected_content = expected.read() | |
| 337 | |
| 338 with open(output_file, "r") as out_file: | |
| 339 output_content = out_file.read() | |
| 340 | |
| 341 assert expected_content == output_content | |
| 342 | |
| 343 def test_24_ENSG00000163519_codon_aln(self): | |
| 344 """ | |
| 345 test kpi_gappy with codon alignment of mammalian sequences | |
| 346 usage: clipkit 24_ENSG00000163519_codon_aln.fasta -g .4 -m kpi-gappy | |
| 347 """ | |
| 348 input_file = f"{here.parent}/samples/24_ENSG00000163519_codon_aln.fasta" | |
| 349 output_file = "output/24_ENSG00000163519_codon_aln.fasta.clipkit" | |
| 350 | |
| 351 kwargs = dict( | |
| 352 input_file=input_file, | |
| 353 output_file=output_file, | |
| 354 input_file_format='fasta', | |
| 355 output_file_format='fasta', | |
| 356 complement=False, | |
| 357 gaps=0.4, | |
| 358 mode=TrimmingMode.kpi_gappy, | |
| 359 use_log=False, | |
| 360 ) | |
| 361 execute(**kwargs) | |
| 362 | |
| 363 with open( | |
| 364 f"{here.parent}/expected/24_ENSG00000163519_codon_aln.fasta_kpi_gappy_custom_gaps", | |
| 365 "r", | |
| 366 ) as expected: | |
| 367 expected_content = expected.read() | |
| 368 | |
| 369 with open(output_file, "r") as out_file: | |
| 370 output_content = out_file.read() | |
| 371 | |
| 372 assert expected_content == output_content | |
| 373 | |
| 374 def test_EOG091N44M8_nt(self): | |
| 375 """ | |
| 376 test kpi_gappy with nucleotide alignment of Penicillium sequences | |
| 377 usage: clipkit EOG091N44M8_nt.fa -g .1 -m kpi-gappy | |
| 378 """ | |
| 379 input_file = f"{here.parent}/samples/EOG091N44M8_nt.fa" | |
| 380 output_file = "output/EOG091N44M8_nt.fa.clipkit" | |
| 381 | |
| 382 kwargs = dict( | |
| 383 input_file=input_file, | |
| 384 output_file=output_file, | |
| 385 input_file_format='fasta', | |
| 386 output_file_format='fasta', | |
| 387 complement=False, | |
| 388 gaps=0.1, | |
| 389 mode=TrimmingMode.kpi_gappy, | |
| 390 use_log=False, | |
| 391 ) | |
| 392 execute(**kwargs) | |
| 393 | |
| 394 with open( | |
| 395 f"{here.parent}/expected/EOG091N44M8_nt.fa_kpi_gappy_custom_gaps", "r" | |
| 396 ) as expected: | |
| 397 expected_content = expected.read() | |
| 398 | |
| 399 with open(output_file, "r") as out_file: | |
| 400 output_content = out_file.read() | |
| 401 | |
| 402 assert expected_content == output_content | |
| 403 | |
| 404 @pytest.mark.slow | |
| 405 def test_EOG092C0CZK_aa(self): | |
| 406 """ | |
| 407 test kpi_gappy with amino alignment of fungal sequences | |
| 408 usage: clipkit EOG092C0CZK_aa_aln.fasta -g .5 -m kpi-gappy | |
| 409 """ | |
| 410 input_file = f"{here.parent}/samples/EOG092C0CZK_aa_aln.fasta" | |
| 411 output_file = "output/EOG092C0CZK_aa_aln.fasta.clipkit" | |
| 412 | |
| 413 kwargs = dict( | |
| 414 input_file=input_file, | |
| 415 output_file=output_file, | |
| 416 input_file_format='fasta', | |
| 417 output_file_format='fasta', | |
| 418 complement=False, | |
| 419 gaps=0.5, | |
| 420 mode=TrimmingMode.kpi_gappy, | |
| 421 use_log=False, | |
| 422 ) | |
| 423 execute(**kwargs) | |
| 424 | |
| 425 with open( | |
| 426 f"{here.parent}/expected/EOG092C0CZK_aa_aln.fasta_kpi_gappy_custom_gaps", | |
| 427 "r", | |
| 428 ) as expected: | |
| 429 expected_content = expected.read() | |
| 430 | |
| 431 with open(output_file, "r") as out_file: | |
| 432 output_content = out_file.read() | |
| 433 | |
| 434 assert expected_content == output_content | |
| 435 | |
| 436 @pytest.mark.slow | |
| 437 def test_EOG092C4VOX_aa(self): | |
| 438 """ | |
| 439 test kpi_gappy with amino alignment of fungal sequences | |
| 440 usage: clipkit EOG092C4VOX_aa_aln.fasta -g .25 -m kpi-gappy | |
| 441 """ | |
| 442 input_file = f"{here.parent}/samples/EOG092C4VOX_aa_aln.fasta" | |
| 443 output_file = "output/EOG092C4VOX_aa_aln.fasta.clipkit" | |
| 444 | |
| 445 kwargs = dict( | |
| 446 input_file=input_file, | |
| 447 output_file=output_file, | |
| 448 input_file_format='fasta', | |
| 449 output_file_format='fasta', | |
| 450 complement=False, | |
| 451 gaps=0.25, | |
| 452 mode=TrimmingMode.kpi_gappy, | |
| 453 use_log=False, | |
| 454 ) | |
| 455 execute(**kwargs) | |
| 456 | |
| 457 with open( | |
| 458 f"{here.parent}/expected/EOG092C4VOX_aa_aln.fasta_kpi_gappy_custom_gaps", | |
| 459 "r", | |
| 460 ) as expected: | |
| 461 expected_content = expected.read() | |
| 462 | |
| 463 with open(output_file, "r") as out_file: | |
| 464 output_content = out_file.read() | |
| 465 | |
| 466 assert expected_content == output_content |
