# HG changeset patch # User iuc # Date 1762685781 0 # Node ID a14d5c1e1fc4046978d656cede02e7f3cbaf86e7 planemo upload for repository https://github.com/georgehe23/tools-iuc/tree/main/tools/pal2nal commit aed49bdc26e503297e1fc394ada087042dc23386 diff -r 000000000000 -r a14d5c1e1fc4 info.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/info.xml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,53 @@ + + 14.1 + 0 + + inputs/for_paml/test.codon``` + +**Ka/Ks calculation** + +To compute Ka and Ks values, run the resulting codon alignment through PAML's `codeml`, as illustrated in the PAL2NAL distribution (`inputs/for_paml/test.cnt`, `test.tree`, `test.codeml.ori`). + +**Warnings** + +PAL2NAL issues messages when protein residues and underlying codons disagree (for example, pseudogene cases). These warnings are harmless unless they indicate unintended mismatches; enable *Suppress STDERR messages* to hide them. + +**References and contacts** + +* PAL2NAL website: http://www.bork.embl.de/pal2nal +* Support: Mikita Suyama (mikita@bioreg.kyushu-u.ac.jp) +* Example data: `inputs/test.aln`, `inputs/test.nuc`, and PAML helpers inside the `inputs/for_paml/` directory. + ]]> + + + + 10.1093/nar/gkl315 + + + diff -r 000000000000 -r a14d5c1e1fc4 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,39 @@ + + + pal2nal.pl | head -n 1 + + + + pal2nal + + + + '$output_file'; +#if $html_output: +pal2nal.pl '$protein_alignment' +#for $nuc in $nucleotide_fastas: + '$nuc' +#end for + -output $output_format + $show_only_blocks + $remove_gaps + $remove_mismatches + -codontable $genetic_code + -html + $suppress_stderr + > '$html_output_file' +#end if + ]]> + + diff -r 000000000000 -r a14d5c1e1fc4 pal2nal.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pal2nal.xml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,84 @@ + + Codon-based nucleotide alignment from protein and DNA sequences + + + info.xml + macros.xml + tests.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + html_output + + + + + + + + + diff -r 000000000000 -r a14d5c1e1fc4 test-data/inputs/for_paml/test.cnt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/for_paml/test.cnt Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,36 @@ + seqfile = test.codon + treefile = test.tree + outfile = test.codeml + + noisy = 0 * 0,1,2,3,9: how much rubbish on the screen + verbose = 0 * 1: detailed output, 0: concise output + runmode = -2 * 0: user tree; 1: semi-automatic; 2: automatic + * 3: StepwiseAddition; (4,5):PerturbationNNI; -2: pairwise + + cleandata = 1 * "I added on 07/07/2004" Mikita Suyama + + seqtype = 1 * 1:codons; 2:AAs; 3:codons-->AAs + CodonFreq = 2 * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table + model = 2 + * models for codons: + * 0:one, 1:b, 2:2 or more dN/dS ratios for branches + + NSsites = 0 * dN/dS among sites. 0:no variation, 1:neutral, 2:positive + icode = 0 * 0:standard genetic code; 1:mammalian mt; 2-10:see below + Mgene = 0 * 0:rates, 1:separate; 2:pi, 3:kappa, 4:all + + fix_kappa = 0 * 1: kappa fixed, 0: kappa to be estimated + kappa = 2 * initial or fixed kappa + fix_omega = 0 * 1: omega or omega_1 fixed, 0: estimate + omega = 1 * initial or fixed omega, for codons or codon-transltd AAs + + fix_alpha = 1 * 0: estimate gamma shape parameter; 1: fix it at alpha + alpha = .0 * initial or fixed alpha, 0:infinity (constant rate) + Malpha = 0 * different alphas for genes + ncatG = 4 * # of categories in the dG or AdG models of rates + + clock = 0 * 0: no clock, unrooted tree, 1: clock, rooted tree + getSE = 0 * 0: don't want them, 1: want S.E.s of estimates + RateAncestor = 0 * (1/0): rates (alpha>0) or ancestral states (alpha=0) + method = 0 * 0: simultaneous; 1: one branch at a time + diff -r 000000000000 -r a14d5c1e1fc4 test-data/inputs/for_paml/test.codeml.ori --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/for_paml/test.codeml.ori Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,102 @@ +CODONML (in paml 3.14, March 2005) test.codon Model: several dN/dS ratios for branches +Codon frequencies: F3x4 + +ns = 2 ls = 177 +# site patterns = 105 + 2 1 1 1 1 1 1 1 1 1 4 3 2 1 2 + 1 1 4 1 1 2 1 1 1 7 2 3 1 2 1 + 2 1 3 1 1 2 2 8 1 1 1 1 1 1 7 + 1 1 1 3 1 4 1 1 4 1 1 3 2 1 6 + 1 1 1 2 1 1 1 6 2 1 3 1 1 4 1 + 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 + 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 + + +1 +BC070280 CTA AAC TGC ATC GTC GCT GTG TCC CAG AAC ATG GGC ATC GGC AAG AAC GGG GAC CTG CCC CCA CCG CTC AGG AAT GAA TTC CAG AGA ACC ACA ACC TCT TCA GTA GAA GGT AAA CAG CTG GTG ATT ATG GGT AAG ACC TGG TCC ATT CCT GAG CGA CCT TTA GGT AGA GTT CTC AGC GAA CTC CCT CCA CAA GGA GCT CAT TTT CTT TCC AGT CTA GAT GAT GCC CTT ACT CCA GCA GTA ATG CTC TGG ATA GTT GGT TAT GCC GGC CAT CTT GTG AGG CAA GAA ACG ATT TTG CTG CCA TAC GTC CAG GTA GAG +pseudogene ... ... ... ..T ... AA. .AT ... ... ..G ... ... ... AT. .G. ..T ... ... ... ... ..T .A. ... .AA ... A.. ... ..A ... ... ... C.. ... ... .C. ..G ... ... G.A T.A ..A T.. T.A AT. ... .A. ... ..G ... A.. ... .A. ... ... TA. .T. ... G.. ..T ... TC. ..A ..G ... A.. C.. .C. ... ... GA. ... ..G .G. ... ... .G. .T. .T. ... CA. G.. T.T .TT .C. ..G ..A ... T.. .A. ... T.. ... T.. ..G C.. ... GG. ..A ..C ... ... ..G G.. ... ..A + +Codon usage in sequences +-------------------------------------------------------------- +Phe TTT 6 10 | Ser TCT 3 3 | Tyr TAT 3 4 | Cys TGT 0 0 + TTC 3 3 | TCC 3 3 | TAC 2 2 | TGC 1 1 +Leu TTA 4 7 | TCA 1 1 | *** TAA 0 0 | *** TGA 0 0 + TTG 1 0 | TCG 0 1 | TAG 0 0 | Trp TGG 2 2 +-------------------------------------------------------------- +Leu CTT 4 2 | Pro CCT 3 5 | His CAT 2 1 | Arg CGT 0 1 + CTC 5 2 | CCC 1 2 | CAC 0 0 | CGC 0 0 + CTA 3 3 | CCA 6 3 | Gln CAA 3 6 | CGA 1 0 + CTG 3 2 | CCG 1 1 | CAG 4 3 | CGG 0 0 +-------------------------------------------------------------- +Ile ATT 5 6 | Thr ACT 1 1 | Asn AAT 7 9 | Ser AGT 3 4 + ATC 3 3 | ACC 3 1 | AAC 3 2 | AGC 1 0 + ATA 1 1 | ACA 2 3 | Lys AAA 8 11 | Arg AGA 3 3 +Met ATG 6 4 | ACG 1 1 | AAG 9 8 | AGG 2 2 +-------------------------------------------------------------- +Val GTT 4 3 | Ala GCT 2 0 | Asp GAT 5 5 | Gly GGT 5 4 + GTC 2 3 | GCC 2 1 | GAC 4 6 | GGC 5 3 + GTA 3 2 | GCA 1 2 | Glu GAA 11 8 | GGA 1 1 + GTG 3 4 | GCG 0 0 | GAG 5 7 | GGG 1 1 +-------------------------------------------------------------- + +Codon position x base (3x4) table for each sequence. + +#1: BC070280 +position 1: T:0.16384 C:0.20339 A:0.32768 G:0.30508 +position 2: T:0.31638 C:0.16949 A:0.37288 G:0.14124 +position 3: T:0.29944 C:0.21469 A:0.27119 G:0.21469 + +#2: pseudogene +position 1: T:0.20904 C:0.17514 A:0.33333 G:0.28249 +position 2: T:0.31073 C:0.15819 A:0.40678 G:0.12429 +position 3: T:0.32768 C:0.18079 A:0.28814 G:0.20339 + +Sums of codon usage counts +------------------------------------------------------------------------------ +Phe F TTT 16 | Ser S TCT 6 | Tyr Y TAT 7 | Cys C TGT 0 + TTC 6 | TCC 6 | TAC 4 | TGC 2 +Leu L TTA 11 | TCA 2 | *** * TAA 0 | *** * TGA 0 + TTG 1 | TCG 1 | TAG 0 | Trp W TGG 4 +------------------------------------------------------------------------------ +Leu L CTT 6 | Pro P CCT 8 | His H CAT 3 | Arg R CGT 1 + CTC 7 | CCC 3 | CAC 0 | CGC 0 + CTA 6 | CCA 9 | Gln Q CAA 9 | CGA 1 + CTG 5 | CCG 2 | CAG 7 | CGG 0 +------------------------------------------------------------------------------ +Ile I ATT 11 | Thr T ACT 2 | Asn N AAT 16 | Ser S AGT 7 + ATC 6 | ACC 4 | AAC 5 | AGC 1 + ATA 2 | ACA 5 | Lys K AAA 19 | Arg R AGA 6 +Met M ATG 10 | ACG 2 | AAG 17 | AGG 4 +------------------------------------------------------------------------------ +Val V GTT 7 | Ala A GCT 2 | Asp D GAT 10 | Gly G GGT 9 + GTC 5 | GCC 3 | GAC 10 | GGC 8 + GTA 5 | GCA 3 | Glu E GAA 19 | GGA 2 + GTG 7 | GCG 0 | GAG 12 | GGG 2 +------------------------------------------------------------------------------ + + +Codon position x base (3x4) table, overall + +position 1: T:0.18644 C:0.18927 A:0.33051 G:0.29379 +position 2: T:0.31356 C:0.16384 A:0.38983 G:0.13277 +position 3: T:0.31356 C:0.19774 A:0.27966 G:0.20904 + + +Nei & Gojobori 1986. dN/dS (dN, dS) +(Note: This matrix is not used in later m.l. analysis. +Use runmode = -2 for ML pairwise comparison.) + +BC070280 +pseudogene 0.5224 (0.1421 0.2721) + +pairwise comparison, codon frequencies: F3x4. + + +2 (pseudogene) ... 1 (BC070280) +lnL =-1014.258355 + 0.52723 1.94064 0.59797 + +t= 0.5272 S= 129.6 N= 401.4 dN/dS= 0.5980 dN= 0.1510 dS= 0.2525 + + + diff -r 000000000000 -r a14d5c1e1fc4 test-data/inputs/for_paml/test.codon --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/for_paml/test.codon Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,23 @@ + 2 570 +BC070280 +ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC +GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA +ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC +ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC +AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT +ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT +GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG +CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA +CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG +TACAAATTTGAAGTATATGAGAAGAATGAT +pseudogene +------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT +GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA +CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG +ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC +AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT +ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT +GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG +CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA +CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG +TACAAATTTGAAGTATATGAAAAGAATGAT diff -r 000000000000 -r a14d5c1e1fc4 test-data/inputs/for_paml/test.tree --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/for_paml/test.tree Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,1 @@ +(BC070280, pseudogene); diff -r 000000000000 -r a14d5c1e1fc4 test-data/inputs/test.aln --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test.aln Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,19 @@ +CLUSTAL W (1.82) multiple sequence alignment + + +BC070280 MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEGKQNLVIMGKKTWFS +pseudogene ----LNCIVNVSQKMGIIRNGDLP*PQLKNKF2-FQRMTTPSSAEGKENLVFLIRKNWFS + ################# ########################## + + +BC070280 IPEKNRPLKGRINLVLSRELKEPPQGAHFLSRSLDDALKLTEQPELANKVDMLWIVGGSS +pseudogene ITEKNQPLKYIINLVVSRESKEPPQRPPFLD*SLGDALKRIEQLKLANKQDVFFTVGGSS + ############### ##################### + + +BC070280 VYKEAMNHPGHLKLFVTRIMQDFESDTFF-PEIDLEKYKLLPEYP-GVLSDVQEEKGIKY +pseudogene VYKESMN*-DHFKLFVTWIMQDFQSDTFFS4EGDLEKYKLLPEYPQGVVSDVEEEKGIKY + + +BC070280 KFEVYEKND +pseudogene KFEVYEKND \ No newline at end of file diff -r 000000000000 -r a14d5c1e1fc4 test-data/inputs/test.nuc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test.nuc Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,29 @@ +>BC0700280 dihydrofolate reductase (human) +TGTAACGAGC GGGCTCGGAG GTCCTCCCGC TGCTGTCATG GTTGGTTCGC TAAACTGCAT +CGTCGCTGTG TCCCAGAACA TGGGCATCGG CAAGAACGGG GACCTGCCCT GGCCACCGCT +CAGGAATGAA TTCAGATATT TCCAGAGAAT GACCACAACC TCTTCAGTAG AAGGTAAACA +GAATCTGGTG ATTATGGGTA AGAAGACCTG GTTCTCCATT CCTGAGAAGA ATCGACCTTT +AAAGGGTAGA ATTAATTTAG TTCTCAGCAG AGAACTCAAG GAACCTCCAC AAGGAGCTCA +TTTTCTTTCC AGAAGTCTAG ATGATGCCTT AAAACTTACT GAACAACCAG AATTAGCAAA +TAAAGTAGAC ATGCTCTGGA TAGTTGGTGG CAGTTCTGTT TATAAGGAAG CCATGAATCA +CCCAGGCCAT CTTAAACTAT TTGTGACAAG GATCATGCAA GACTTTGAAA GTGACACGTT +TTTTCCAGAA ATTGATTTGG AGAAATATAA ACTTCTGCCA GAATACCCAG GTGTTCTCTC +TGATGTCCAG GAGGAGAAAG GCATTAAGTA CAAATTTGAA GTATATGAGA AGAATGATTA +ATATGAAGGT GTTTTCTAGT TTAAGTTGTT CCCCCTCCCT CTGAAAAAAG TATGTATTTT +TACATTAGAA AAGGTTTTTT GTTGACTTTA GATCTATAAT TATTTCTAAG CAACTTGTTT +TTATTCCCCA CTACTCTTGT CTCTATCAGA TACCATTTAT GAGACATTCT TGCTATAACT +AAGTGCTTCT CCAAGACCCC AACTGAGTCC CCAGCACCTG CTACAGTGAG CTGCCATTCC +ACACCCATCA CATGTGGCAC TCTTGCCAGT CCTTGACATT GTCGGGCTTT TCACATGTTG +GTAATATTTA TTAAAGATGA AGATCCACAT ACCCTTCAAA AAAAAAAAAA AAAAAAAAAA +AAAAAAA +>pseudogene dihydrofolate reductase pseudogene (human) +CTAAACTGCA TTGTCAATGA TTCCCAGAAG ATGGGCATCA TCAGGAATGG GGACCTGCCC +TGACCTCAGC TCAAAAATAA ATTCGATTCC AAAGAATGAC CACACCCTCT TCAGCAGAGG +GTAAAGAAAA TTTAGTATTT TTAATTAGGA AGAACTGGTT CTCGATTACT GAGAAGAATC +AACCTTTAAA GTATATAATT AATTTAGTTG TCAGTAGAGA ATCCAAGGAA CCACCGCAAA +GACCTCCTTT TCTTGACTAA AGTCTGGGTG ATGCCTTAAA ACGTATTGAG CAACTAAAAT +TAGCAAATAA ACAAGACGTG TTTTTTACAG TGGGAGGCAG TTCTGTTTAT AAGGAATCCA +TGAATTGAGA CCATTTTAAA CTATTTGTGA CATGGATCAT GCAGGACTTT CAAAGTGACA +CGTTTTTTTC CCCTAGAAGG TGATTTAGAG AAATATAAAC TTCTCCCAGA ATACCCACAA +GGTGTTGTCT CTGATGTGGA GGAGGAGAAA GGCATTAAGT ACAAATTTGA AGTATATGAA +AAGAATGAT \ No newline at end of file diff -r 000000000000 -r a14d5c1e1fc4 test-data/inputs/test_bc070280.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test_bc070280.fasta Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,18 @@ +>BC0700280 dihydrofolate reductase (human) +TGTAACGAGC GGGCTCGGAG GTCCTCCCGC TGCTGTCATG GTTGGTTCGC TAAACTGCAT +CGTCGCTGTG TCCCAGAACA TGGGCATCGG CAAGAACGGG GACCTGCCCT GGCCACCGCT +CAGGAATGAA TTCAGATATT TCCAGAGAAT GACCACAACC TCTTCAGTAG AAGGTAAACA +GAATCTGGTG ATTATGGGTA AGAAGACCTG GTTCTCCATT CCTGAGAAGA ATCGACCTTT +AAAGGGTAGA ATTAATTTAG TTCTCAGCAG AGAACTCAAG GAACCTCCAC AAGGAGCTCA +TTTTCTTTCC AGAAGTCTAG ATGATGCCTT AAAACTTACT GAACAACCAG AATTAGCAAA +TAAAGTAGAC ATGCTCTGGA TAGTTGGTGG CAGTTCTGTT TATAAGGAAG CCATGAATCA +CCCAGGCCAT CTTAAACTAT TTGTGACAAG GATCATGCAA GACTTTGAAA GTGACACGTT +TTTTCCAGAA ATTGATTTGG AGAAATATAA ACTTCTGCCA GAATACCCAG GTGTTCTCTC +TGATGTCCAG GAGGAGAAAG GCATTAAGTA CAAATTTGAA GTATATGAGA AGAATGATTA +ATATGAAGGT GTTTTCTAGT TTAAGTTGTT CCCCCTCCCT CTGAAAAAAG TATGTATTTT +TACATTAGAA AAGGTTTTTT GTTGACTTTA GATCTATAAT TATTTCTAAG CAACTTGTTT +TTATTCCCCA CTACTCTTGT CTCTATCAGA TACCATTTAT GAGACATTCT TGCTATAACT +AAGTGCTTCT CCAAGACCCC AACTGAGTCC CCAGCACCTG CTACAGTGAG CTGCCATTCC +ACACCCATCA CATGTGGCAC TCTTGCCAGT CCTTGACATT GTCGGGCTTT TCACATGTTG +GTAATATTTA TTAAAGATGA AGATCCACAT ACCCTTCAAA AAAAAAAAAA AAAAAAAAAA +AAAAAAA diff -r 000000000000 -r a14d5c1e1fc4 test-data/inputs/test_dup.nuc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test_dup.nuc Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,29 @@ +>BC0700280 dihydrofolate reductase (human) +TGTAACGAGC GGGCTCGGAG GTCCTCCCGC TGCTGTCATG GTTGGTTCGC TAAACTGCAT +CGTCGCTGTG TCCCAGAACA TGGGCATCGG CAAGAACGGG GACCTGCCCT GGCCACCGCT +CAGGAATGAA TTCAGATATT TCCAGAGAAT GACCACAACC TCTTCAGTAG AAGGTAAACA +GAATCTGGTG ATTATGGGTA AGAAGACCTG GTTCTCCATT CCTGAGAAGA ATCGACCTTT +AAAGGGTAGA ATTAATTTAG TTCTCAGCAG AGAACTCAAG GAACCTCCAC AAGGAGCTCA +TTTTCTTTCC AGAAGTCTAG ATGATGCCTT AAAACTTACT GAACAACCAG AATTAGCAAA +TAAAGTAGAC ATGCTCTGGA TAGTTGGTGG CAGTTCTGTT TATAAGGAAG CCATGAATCA +CCCAGGCCAT CTTAAACTAT TTGTGACAAG GATCATGCAA GACTTTGAAA GTGACACGTT +TTTTCCAGAA ATTGATTTGG AGAAATATAA ACTTCTGCCA GAATACCCAG GTGTTCTCTC +TGATGTCCAG GAGGAGAAAG GCATTAAGTA CAAATTTGAA GTATATGAGA AGAATGATTA +ATATGAAGGT GTTTTCTAGT TTAAGTTGTT CCCCCTCCCT CTGAAAAAAG TATGTATTTT +TACATTAGAA AAGGTTTTTT GTTGACTTTA GATCTATAAT TATTTCTAAG CAACTTGTTT +TTATTCCCCA CTACTCTTGT CTCTATCAGA TACCATTTAT GAGACATTCT TGCTATAACT +AAGTGCTTCT CCAAGACCCC AACTGAGTCC CCAGCACCTG CTACAGTGAG CTGCCATTCC +ACACCCATCA CATGTGGCAC TCTTGCCAGT CCTTGACATT GTCGGGCTTT TCACATGTTG +GTAATATTTA TTAAAGATGA AGATCCACAT ACCCTTCAAA AAAAAAAAAA AAAAAAAAAA +AAAAAAA +>pseudogene dihydrofolate reductase pseudogene (human) +CTAAACTGCA TTGTCAATGA TTCCCAGAAG ATGGGCATCA TCAGGAATGG GGACCTGCCC +TGACCTCAGC TCAAAAATAA ATTCGATTCC AAAGAATGAC CACACCCTCT TCAGCAGAGG +GTAAAGAAAA TTTAGTATTT TTAATTAGGA AGAACTGGTT CTCGATTACT GAGAAGAATC +AACCTTTAAA GTATATAATT AATTTAGTTG TCAGTAGAGA ATCCAAGGAA CCACCGCAAA +GACCTCCTTT TCTTGACTAA AGTCTGGGTG ATGCCTTAAA ACGTATTGAG CAACTAAAAT +TAGCAAATAA ACAAGACGTG TTTTTTACAG TGGGAGGCAG TTCTGTTTAT AAGGAATCCA +TGAATTGAGA CCATTTTAAA CTATTTGTGA CATGGATCAT GCAGGACTTT CAAAGTGACA +CGTTTTTTTC CCCTAGAAGG TGATTTAGAG AAATATAAAC TTCTCCCAGA ATACCCACAA +GGTGTTGTCT CTGATGTGGA GGAGGAGAAA GGCATTAAGT ACAAATTTGA AGTATATGAA +AAGAATGAT \ No newline at end of file diff -r 000000000000 -r a14d5c1e1fc4 test-data/inputs/test_pseudogene.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test_pseudogene.fasta Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,11 @@ +>pseudogene dihydrofolate reductase pseudogene (human) +CTAAACTGCA TTGTCAATGA TTCCCAGAAG ATGGGCATCA TCAGGAATGG GGACCTGCCC +TGACCTCAGC TCAAAAATAA ATTCGATTCC AAAGAATGAC CACACCCTCT TCAGCAGAGG +GTAAAGAAAA TTTAGTATTT TTAATTAGGA AGAACTGGTT CTCGATTACT GAGAAGAATC +AACCTTTAAA GTATATAATT AATTTAGTTG TCAGTAGAGA ATCCAAGGAA CCACCGCAAA +GACCTCCTTT TCTTGACTAA AGTCTGGGTG ATGCCTTAAA ACGTATTGAG CAACTAAAAT +TAGCAAATAA ACAAGACGTG TTTTTTACAG TGGGAGGCAG TTCTGTTTAT AAGGAATCCA +TGAATTGAGA CCATTTTAAA CTATTTGTGA CATGGATCAT GCAGGACTTT CAAAGTGACA +CGTTTTTTTC CCCTAGAAGG TGATTTAGAG AAATATAAAC TTCTCCCAGA ATACCCACAA +GGTGTTGTCT CTGATGTGGA GGAGGAGAAA GGCATTAAGT ACAAATTTGA AGTATATGAA +AAGAATGAT diff -r 000000000000 -r a14d5c1e1fc4 test-data/outputs/expected_block_nomismatch.aln --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_block_nomismatch.aln Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,14 @@ +CLUSTAL W multiple sequence alignment + +BC070280 CTAAACTGCATCGTCGCTTCCCAGAACATGGGCATCGGCAACGGGTTCCAGATGACCACA +pseudogene CTAAACTGCATTGTCAATTCCCAGAAGATGGGCATCATCAATGGGTTCCAAATGACCACA + +BC070280 ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGACCTGGTTCTCCATT +pseudogene CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAAGAACTGGTTCTCGATT + +BC070280 CCTGAGAAGAATCGACCTTTAAAGGGTATTAATTTAGTTGATGCCTTAAAACTTACTGAA +pseudogene ACTGAGAAGAATCAACCTTTAAAGTATATTAATTTAGTTGATGCCTTAAAACGTATTGAG + +BC070280 CAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGGTT +pseudogene CAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTGTG + diff -r 000000000000 -r a14d5c1e1fc4 test-data/outputs/expected_clustal.aln --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_clustal.aln Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,42 @@ +CLUSTAL W multiple sequence alignment + +BC070280 ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC +pseudogene ------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT + ################################################ + +BC070280 GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA +pseudogene GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA + ### ################## + +BC070280 ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC +pseudogene CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG + ############################################################ + +BC070280 ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC +pseudogene ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC + ############################################# + +BC070280 AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT +pseudogene AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT + ############### + +BC070280 ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT +pseudogene ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT + ################################################ + +BC070280 GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG +pseudogene GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG + + +BC070280 CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA +pseudogene CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA + + +BC070280 CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG +pseudogene CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG + + +BC070280 TACAAATTTGAAGTATATGAGAAGAATGAT +pseudogene TACAAATTTGAAGTATATGAAAAGAATGAT + + diff -r 000000000000 -r a14d5c1e1fc4 test-data/outputs/expected_clustal_multi.aln --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_clustal_multi.aln Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,42 @@ +CLUSTAL W multiple sequence alignment + +BC070280 ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC +pseudogene ------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT + ################################################ + +BC070280 GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA +pseudogene GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA + ### ################## + +BC070280 ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC +pseudogene CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG + ############################################################ + +BC070280 ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC +pseudogene ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC + ############################################# + +BC070280 AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT +pseudogene AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT + ############### + +BC070280 ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT +pseudogene ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT + ################################################ + +BC070280 GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG +pseudogene GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG + + +BC070280 CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA +pseudogene CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA + + +BC070280 CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG +pseudogene CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG + + +BC070280 TACAAATTTGAAGTATATGAGAAGAATGAT +pseudogene TACAAATTTGAAGTATATGAAAAGAATGAT + + diff -r 000000000000 -r a14d5c1e1fc4 test-data/outputs/expected_codon.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_codon.txt Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,60 @@ + M V G S L N C I V A V S Q N M G I G K N +BC070280 ATG GTT GGT TCG CTA AAC TGC ATC GTC GCT GTG TCC CAG AAC ATG GGC ATC GGC AAG AAC + - - - - L N C I V N V S Q K M G I I R N +pseudogene --- --- --- --- CTA AAC TGC ATT GTC AAT GAT TCC CAG AAG ATG GGC ATC ATC AGG AAT + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + G D L P W P P L R N E F R Y F Q R M T T +BC070280 GGG GAC CTG CCC TGG CCA CCG CTC AGG AAT GAA TTC AGA TAT TTC CAG AGA ATG ACC ACA + G D L P * P Q L K N K F 2 - F Q R M T T +pseudogene GGG GAC CTG CCC TGA CCT CAG CTC AAA AAT AAA TTC GA- --- TTC CAA AGA ATG ACC ACA + ### ### ### ### ### ### ### + + T S S V E G K Q N L V I M G K K T W F S +BC070280 ACC TCT TCA GTA GAA GGT AAA CAG AAT CTG GTG ATT ATG GGT AAG AAG ACC TGG TTC TCC + P S S A E G K E N L V F L I R K N W F S +pseudogene CCC TCT TCA GCA GAG GGT AAA GAA AAT TTA GTA TTT TTA ATT AGG AAG AAC TGG TTC TCG + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + I P E K N R P L K G R I N L V L S R E L +BC070280 ATT CCT GAG AAG AAT CGA CCT TTA AAG GGT AGA ATT AAT TTA GTT CTC AGC AGA GAA CTC + I T E K N Q P L K Y I I N L V V S R E S +pseudogene ATT ACT GAG AAG AAT CAA CCT TTA AAG TAT ATA ATT AAT TTA GTT GTC AGT AGA GAA TCC + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + K E P P Q G A H F L S R S L D D A L K L +BC070280 AAG GAA CCT CCA CAA GGA GCT CAT TTT CTT TCC AGA AGT CTA GAT GAT GCC TTA AAA CTT + K E P P Q R P P F L D * S L G D A L K R +pseudogene AAG GAA CCA CCG CAA AGA CCT CCT TTT CTT GAC TAA AGT CTG GGT GAT GCC TTA AAA CGT + ### ### ### ### ### + + T E Q P E L A N K V D M L W I V G G S S +BC070280 ACT GAA CAA CCA GAA TTA GCA AAT AAA GTA GAC ATG CTC TGG ATA GTT GGT GGC AGT TCT + I E Q L K L A N K Q D V F F T V G G S S +pseudogene ATT GAG CAA CTA AAA TTA GCA AAT AAA CAA GAC GTG TTT TTT ACA GTG GGA GGC AGT TCT + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + V Y K E A M N H P G H L K L F V T R I M +BC070280 GTT TAT AAG GAA GCC ATG AAT CAC CCA GGC CAT CTT AAA CTA TTT GTG ACA AGG ATC ATG + V Y K E S M N * - D H F K L F V T W I M +pseudogene GTT TAT AAG GAA TCC ATG AAT TGA --- GAC CAT TTT AAA CTA TTT GTG ACA TGG ATC ATG + + + Q D F E S D T F F - P - E I D L E K Y K +BC070280 CAA GAC TTT GAA AGT GAC ACG TTT TTT --- CCA --- GAA ATT GAT TTG GAG AAA TAT AAA + Q D F Q S D T F F S 4 - E G D L E K Y K +pseudogene CAG GAC TTT CAA AGT GAC ACG TTT TTT TCC CCT A-- GAA GGT GAT TTA GAG AAA TAT AAA + + + L L P E Y P - G V L S D V Q E E K G I K +BC070280 CTT CTG CCA GAA TAC CCA --- GGT GTT CTC TCT GAT GTC CAG GAG GAG AAA GGC ATT AAG + L L P E Y P Q G V V S D V E E E K G I K +pseudogene CTT CTC CCA GAA TAC CCA CAA GGT GTT GTC TCT GAT GTG GAG GAG GAG AAA GGC ATT AAG + + + Y K F E V Y E K N D +BC070280 TAC AAA TTT GAA GTA TAT GAG AAG AAT GAT + Y K F E V Y E K N D +pseudogene TAC AAA TTT GAA GTA TAT GAA AAG AAT GAT + + diff -r 000000000000 -r a14d5c1e1fc4 test-data/outputs/expected_html.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_html.html Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,16 @@ +
+CLUSTAL W multiple sequence alignment
+
+BC070280      CTAAACTGCATCGTCGCTTCCCAGAACATGGGCATCGGCAACGGGTTCCAGATGACCACA
+pseudogene    CTAAACTGCATTGTCAATTCCCAGAAGATGGGCATCATCAATGGGTTCCAAATGACCACA
+
+BC070280      ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGACCTGGTTCTCCATT
+pseudogene    CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAAGAACTGGTTCTCGATT
+
+BC070280      CCTGAGAAGAATCGACCTTTAAAGGGTATTAATTTAGTTGATGCCTTAAAACTTACTGAA
+pseudogene    ACTGAGAAGAATCAACCTTTAAAGTATATTAATTTAGTTGATGCCTTAAAACGTATTGAG
+
+BC070280      CAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGGTT
+pseudogene    CAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTGTG
+
+
diff -r 000000000000 -r a14d5c1e1fc4 test-data/outputs/expected_nogap.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_nogap.fasta Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,20 @@ +>BC070280 +CTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAACGGGGACCTGCCC +CCACCGCTCAGGAATGAATTCTTCCAGAGAATGACCACAACCTCTTCAGTAGAAGGTAAA +CAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCCATTCCTGAGAAGAATCGACCT +TTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTCAAGGAACCTCCACAAGGAGCT +CATTTTCTTTCCAGTCTAGATGATGCCTTAAAACTTACTGAACAACCAGAATTAGCAAAT +AAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCTGTTTATAAGGAAGCCATGAATGGC +CATCTTAAACTATTTGTGACAAGGATCATGCAAGACTTTGAAAGTGACACGTTTTTTCCA +GAAATTGATTTGGAGAAATATAAACTTCTGCCAGAATACCCAGGTGTTCTCTCTGATGTC +CAGGAGGAGAAAGGCATTAAGTACAAATTTGAAGTATATGAGAAGAATGAT +>pseudogene +CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAATGGGGACCTGCCC +CCTCAGCTCAAAAATAAATTCTTCCAAAGAATGACCACACCCTCTTCAGCAGAGGGTAAA +GAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCGATTACTGAGAAGAATCAACCT +TTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCCAAGGAACCACCGCAAAGACCT +CCTTTTCTTGACAGTCTGGGTGATGCCTTAAAACGTATTGAGCAACTAAAATTAGCAAAT +AAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCTGTTTATAAGGAATCCATGAATGAC +CATTTTAAACTATTTGTGACATGGATCATGCAGGACTTTCAAAGTGACACGTTTTTTCCT +GAAGGTGATTTAGAGAAATATAAACTTCTCCCAGAATACCCAGGTGTTGTCTCTGATGTG +GAGGAGGAGAAAGGCATTAAGTACAAATTTGAAGTATATGAAAAGAATGAT diff -r 000000000000 -r a14d5c1e1fc4 test-data/outputs/expected_paml.paml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_paml.paml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,23 @@ + 2 570 +BC070280 +ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC +GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA +ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC +ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC +AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT +ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT +GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG +CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA +CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG +TACAAATTTGAAGTATATGAGAAGAATGAT +pseudogene +------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT +GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA +CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG +ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC +AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT +ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT +GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG +CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA +CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG +TACAAATTTGAAGTATATGAAAAGAATGAT diff -r 000000000000 -r a14d5c1e1fc4 tests.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests.xml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +