changeset 0:a14d5c1e1fc4 draft default tip

planemo upload for repository https://github.com/georgehe23/tools-iuc/tree/main/tools/pal2nal commit aed49bdc26e503297e1fc394ada087042dc23386
author iuc
date Sun, 09 Nov 2025 10:56:21 +0000
parents
children
files info.xml macros.xml pal2nal.xml test-data/inputs/for_paml/test.cnt test-data/inputs/for_paml/test.codeml.ori test-data/inputs/for_paml/test.codon test-data/inputs/for_paml/test.tree test-data/inputs/test.aln test-data/inputs/test.nuc test-data/inputs/test_bc070280.fasta test-data/inputs/test_dup.nuc test-data/inputs/test_pseudogene.fasta test-data/outputs/expected_block_nomismatch.aln test-data/outputs/expected_clustal.aln test-data/outputs/expected_clustal_multi.aln test-data/outputs/expected_codon.txt test-data/outputs/expected_html.html test-data/outputs/expected_nogap.fasta test-data/outputs/expected_paml.paml tests.xml
diffstat 20 files changed, 710 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/info.xml	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,53 @@
+<macros>
+    <token name="@TOOL_VERSION@">14.1</token>
+    <token name="@WRAPPER_VERSION@">0</token>
+    <xml name="help">
+        <help><![CDATA[
+**Overview**
+
+PAL2NAL converts a protein multiple sequence alignment plus the matching nucleotide FASTA records into a codon-aware alignment suitable for downstream Ka/Ks analyses. The script is maintained by Mikita Suyama (Kyushu University) and is distributed under GPL v2. This Galaxy wrapper surfaces the upstream `pal2nal.pl` tool so that codon alignments can be created inside workflows.
+
+**Inputs**
+
+* *Protein alignment (pep.aln)* — CLUSTAL or FASTA alignment of the translated sequences. Alignments may contain more than two sequences, and frame-shift events can be annotated by numeric placeholders (for example, `2` indicates a single base deletion; see the bundled `test.aln` example).
+* *Nucleotide FASTA (nuc.fasta)* — Corresponding DNA or mRNA sequences. Attach one or more FASTA datasets (use *Add new Nucleotide FASTA files* for additional inputs). Sequence identifiers must match those in the protein alignment; order is detected automatically when identifiers match.
+
+**Options**
+
+* `-output clustal|paml|fasta|codon` (Galaxy: *Output format*).
+* `-blockonly` — Restrict output to user-marked blocks (`#` rows in CLUSTAL alignments).
+* `-nogap` — Remove codons containing gaps or in-frame stops.
+* `-nomismatch` — Remove codons where amino acid and nucleotide sequences disagree (useful for discarding pseudogene regions).
+* `-codontable` — Choose an NCBI genetic code (1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15, 16, 21, 22, 23).
+* `-html` — Produce HTML formatted output (Galaxy: enable *Add HTML formatted output* to store the optional HTML view).
+* `-nostderr` — Suppress STDERR warnings (for example, expected pseudogene mismatches).
+
+**Outputs**
+
+* Codon-respecting alignment rendered in the selected format (Galaxy labels the dataset as CLUSTAL, PAML, FASTA, HTML, or plain text as appropriate).
+
+**Example:**
+
+```pal2nal.pl inputs/test.aln inputs/test.nuc -output paml -nogap > inputs/for_paml/test.codon```
+
+**Ka/Ks calculation**
+
+To compute Ka and Ks values, run the resulting codon alignment through PAML's `codeml`, as illustrated in the PAL2NAL distribution (`inputs/for_paml/test.cnt`, `test.tree`, `test.codeml.ori`).
+
+**Warnings**
+
+PAL2NAL issues messages when protein residues and underlying codons disagree (for example, pseudogene cases). These warnings are harmless unless they indicate unintended mismatches; enable *Suppress STDERR messages* to hide them.
+
+**References and contacts**
+
+* PAL2NAL website: http://www.bork.embl.de/pal2nal
+* Support: Mikita Suyama (mikita@bioreg.kyushu-u.ac.jp)
+* Example data: `inputs/test.aln`, `inputs/test.nuc`, and PAML helpers inside the `inputs/for_paml/` directory.
+        ]]></help>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/nar/gkl315</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,39 @@
+<macros>
+    <xml name="version_command">
+        <version_command>pal2nal.pl | head -n 1</version_command>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">pal2nal</requirement>
+        </requirements>
+    </xml>
+    <xml name="command">
+        <command detect_errors="exit_code"><![CDATA[
+pal2nal.pl '$protein_alignment'
+#for $nuc in $nucleotide_fastas:
+  '$nuc'
+#end for
+  -output $output_format
+  $show_only_blocks
+  $remove_gaps
+  $remove_mismatches
+  -codontable $genetic_code
+  $suppress_stderr
+  > '$output_file';
+#if $html_output:
+pal2nal.pl '$protein_alignment'
+#for $nuc in $nucleotide_fastas:
+  '$nuc'
+#end for
+  -output $output_format
+  $show_only_blocks
+  $remove_gaps
+  $remove_mismatches
+  -codontable $genetic_code
+  -html
+  $suppress_stderr
+  > '$html_output_file'
+#end if
+        ]]></command>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pal2nal.xml	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,84 @@
+<tool id="pal2nal" name="PAL2NAL" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="24.0">
+    <description>Codon-based nucleotide alignment from protein and DNA sequences</description>
+
+    <macros>
+        <import>info.xml</import>
+        <import>macros.xml</import>
+        <import>tests.xml</import>
+    </macros>
+
+    <expand macro="requirements" />
+
+    <expand macro="version_command" />
+
+    <expand macro="command" />
+
+    <inputs>
+        <param name="protein_alignment" type="data" format="fasta,clustal" label="Protein alignment"
+               help="Accepts CLUSTAL/FASTA alignments uploaded through Galaxy." />
+        <param name="nucleotide_fastas" type="data" format="fasta" multiple="true" min="1"
+               label="Nucleotide FASTA files" help="Accepts FASTA-formatted nucleotide sequences." />
+
+        <param name="output_format" type="select" argument="-output" label="Output format">
+            <option value="clustal" selected="true">CLUSTAL (default)</option>
+            <option value="paml">PAML</option>
+            <option value="fasta">FASTA</option>
+            <option value="codon">CODON</option>
+        </param>
+
+        <param name="show_only_blocks" type="boolean" argument="-blockonly" truevalue="-blockonly" falsevalue=""
+               checked="false" label="Show only user-specified blocks" help="Use only '#' marked conserved blocks under CLUSTAL alignment." />
+
+        <param name="remove_gaps" type="boolean" argument="-nogap" truevalue="-nogap" falsevalue=""
+               checked="false" label="Remove codons with gaps or in-frame stop codons" />
+
+        <param name="remove_mismatches" type="boolean" argument="-nomismatch" truevalue="-nomismatch" falsevalue=""
+               checked="false" label="Remove mismatched codons" />
+
+        <param name="genetic_code" type="select" argument="-codontable" label="Genetic code">
+            <option value="1" selected="true">1: Universal</option>
+            <option value="2">2: Vertebrate mitochondrial</option>
+            <option value="3">3: Yeast mitochondrial</option>
+            <option value="4">4: Mold/Protozoan/Coelenterate mito + Mycoplasma</option>
+            <option value="5">5: Invertebrate mitochondrial</option>
+            <option value="6">6: Ciliate/Hexamita nuclear</option>
+            <option value="9">9: Echinoderm/Flatworm mitochondrial</option>
+            <option value="10">10: Euplotid nuclear</option>
+            <option value="11">11: Bacterial/Archaeal/Plastid</option>
+            <option value="12">12: Alternative yeast nuclear</option>
+            <option value="13">13: Ascidian mitochondrial</option>
+            <option value="14">14: Alternative flatworm mitochondrial</option>
+            <option value="15">15: Blepharisma nuclear</option>
+            <option value="16">16: Chlorophycean mitochondrial</option>
+            <option value="21">21: Trematode mitochondrial</option>
+            <option value="22">22: Scenedesmus obliquus mitochondrial</option>
+            <option value="23">23: Thraustochytrium mitochondrial</option>
+        </param>
+
+        <param name="html_output" type="boolean" truevalue="true" falsevalue=""
+               checked="false" label="Add HTML formatted output" help="Produce an additional dataset with PAL2NAL's HTML view." />
+
+        <param name="suppress_stderr" type="boolean" argument="-nostderr" truevalue="-nostderr" falsevalue=""
+               checked="false" label="Suppress STDERR messages" help="Hide warning messages (use for automated pipelines)." />
+    </inputs>
+
+    <outputs>
+        <data name="output_file" format="txt" label="PAL2NAL codon alignment output">
+            <change_format>
+                <when input="output_format" value="clustal" format="clustal" />
+                <when input="output_format" value="fasta" format="fasta" />
+                <when input="output_format" value="paml" format="phylip" />
+                <when input="output_format" value="codon" format="txt" />
+            </change_format>
+        </data>
+        <data name="html_output_file" format="html" label="${tool.name} on ${on_string}: HTML view">
+            <filter>html_output</filter>
+        </data>
+    </outputs>
+
+    <expand macro="tests" />
+
+    <expand macro="help" />
+
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/for_paml/test.cnt	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,36 @@
+      seqfile = test.codon
+     treefile = test.tree
+      outfile = test.codeml
+
+        noisy = 0   * 0,1,2,3,9: how much rubbish on the screen
+      verbose = 0   * 1: detailed output, 0: concise output
+      runmode = -2  * 0: user tree;  1: semi-automatic;  2: automatic
+                    * 3: StepwiseAddition; (4,5):PerturbationNNI; -2: pairwise
+
+    cleandata = 1   * "I added on 07/07/2004" Mikita Suyama
+
+      seqtype = 1   * 1:codons; 2:AAs; 3:codons-->AAs
+    CodonFreq = 2   * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table
+        model = 2
+                    * models for codons:
+                        * 0:one, 1:b, 2:2 or more dN/dS ratios for branches
+
+      NSsites = 0   * dN/dS among sites. 0:no variation, 1:neutral, 2:positive
+        icode = 0   * 0:standard genetic code; 1:mammalian mt; 2-10:see below
+        Mgene = 0   * 0:rates, 1:separate; 2:pi, 3:kappa, 4:all
+
+    fix_kappa = 0   * 1: kappa fixed, 0: kappa to be estimated
+        kappa = 2   * initial or fixed kappa
+    fix_omega = 0   * 1: omega or omega_1 fixed, 0: estimate
+        omega = 1   * initial or fixed omega, for codons or codon-transltd AAs
+
+    fix_alpha = 1   * 0: estimate gamma shape parameter; 1: fix it at alpha
+        alpha = .0  * initial or fixed alpha, 0:infinity (constant rate)
+       Malpha = 0   * different alphas for genes
+        ncatG = 4   * # of categories in the dG or AdG models of rates
+
+        clock = 0   * 0: no clock, unrooted tree, 1: clock, rooted tree
+        getSE = 0   * 0: don't want them, 1: want S.E.s of estimates
+ RateAncestor = 0   * (1/0): rates (alpha>0) or ancestral states (alpha=0)
+       method = 0   * 0: simultaneous; 1: one branch at a time
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/for_paml/test.codeml.ori	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,102 @@
+CODONML (in paml 3.14, March 2005)    test.codon   Model: several dN/dS ratios for branches 
+Codon frequencies: F3x4
+
+ns = 2  	ls = 177
+# site patterns = 105
+    2    1    1    1    1    1    1    1    1    1    4    3    2    1    2
+    1    1    4    1    1    2    1    1    1    7    2    3    1    2    1
+    2    1    3    1    1    2    2    8    1    1    1    1    1    1    7
+    1    1    1    3    1    4    1    1    4    1    1    3    2    1    6
+    1    1    1    2    1    1    1    6    2    1    3    1    1    4    1
+    1    1    1    1    1    1    1    1    1    1    1    3    1    1    1
+    1    1    1    1    1    1    1    1    1    2    2    1    1    1    1
+
+
+1      
+BC070280              CTA AAC TGC ATC GTC GCT GTG TCC CAG AAC ATG GGC ATC GGC AAG AAC GGG GAC CTG CCC CCA CCG CTC AGG AAT GAA TTC CAG AGA ACC ACA ACC TCT TCA GTA GAA GGT AAA CAG CTG GTG ATT ATG GGT AAG ACC TGG TCC ATT CCT GAG CGA CCT TTA GGT AGA GTT CTC AGC GAA CTC CCT CCA CAA GGA GCT CAT TTT CTT TCC AGT CTA GAT GAT GCC CTT ACT CCA GCA GTA ATG CTC TGG ATA GTT GGT TAT GCC GGC CAT CTT GTG AGG CAA GAA ACG ATT TTG CTG CCA TAC GTC CAG GTA GAG 
+pseudogene            ... ... ... ..T ... AA. .AT ... ... ..G ... ... ... AT. .G. ..T ... ... ... ... ..T .A. ... .AA ... A.. ... ..A ... ... ... C.. ... ... .C. ..G ... ... G.A T.A ..A T.. T.A AT. ... .A. ... ..G ... A.. ... .A. ... ... TA. .T. ... G.. ..T ... TC. ..A ..G ... A.. C.. .C. ... ... GA. ... ..G .G. ... ... .G. .T. .T. ... CA. G.. T.T .TT .C. ..G ..A ... T.. .A. ... T.. ... T.. ..G C.. ... GG. ..A ..C ... ... ..G G.. ... ..A 
+
+Codon usage in sequences
+--------------------------------------------------------------
+Phe TTT  6 10 | Ser TCT  3  3 | Tyr TAT  3  4 | Cys TGT  0  0
+    TTC  3  3 |     TCC  3  3 |     TAC  2  2 |     TGC  1  1
+Leu TTA  4  7 |     TCA  1  1 | *** TAA  0  0 | *** TGA  0  0
+    TTG  1  0 |     TCG  0  1 |     TAG  0  0 | Trp TGG  2  2
+--------------------------------------------------------------
+Leu CTT  4  2 | Pro CCT  3  5 | His CAT  2  1 | Arg CGT  0  1
+    CTC  5  2 |     CCC  1  2 |     CAC  0  0 |     CGC  0  0
+    CTA  3  3 |     CCA  6  3 | Gln CAA  3  6 |     CGA  1  0
+    CTG  3  2 |     CCG  1  1 |     CAG  4  3 |     CGG  0  0
+--------------------------------------------------------------
+Ile ATT  5  6 | Thr ACT  1  1 | Asn AAT  7  9 | Ser AGT  3  4
+    ATC  3  3 |     ACC  3  1 |     AAC  3  2 |     AGC  1  0
+    ATA  1  1 |     ACA  2  3 | Lys AAA  8 11 | Arg AGA  3  3
+Met ATG  6  4 |     ACG  1  1 |     AAG  9  8 |     AGG  2  2
+--------------------------------------------------------------
+Val GTT  4  3 | Ala GCT  2  0 | Asp GAT  5  5 | Gly GGT  5  4
+    GTC  2  3 |     GCC  2  1 |     GAC  4  6 |     GGC  5  3
+    GTA  3  2 |     GCA  1  2 | Glu GAA 11  8 |     GGA  1  1
+    GTG  3  4 |     GCG  0  0 |     GAG  5  7 |     GGG  1  1
+--------------------------------------------------------------
+
+Codon position x base (3x4) table for each sequence.
+
+#1: BC070280       
+position  1:    T:0.16384    C:0.20339    A:0.32768    G:0.30508
+position  2:    T:0.31638    C:0.16949    A:0.37288    G:0.14124
+position  3:    T:0.29944    C:0.21469    A:0.27119    G:0.21469
+
+#2: pseudogene     
+position  1:    T:0.20904    C:0.17514    A:0.33333    G:0.28249
+position  2:    T:0.31073    C:0.15819    A:0.40678    G:0.12429
+position  3:    T:0.32768    C:0.18079    A:0.28814    G:0.20339
+
+Sums of codon usage counts
+------------------------------------------------------------------------------
+Phe F TTT      16 | Ser S TCT       6 | Tyr Y TAT       7 | Cys C TGT       0
+      TTC       6 |       TCC       6 |       TAC       4 |       TGC       2
+Leu L TTA      11 |       TCA       2 | *** * TAA       0 | *** * TGA       0
+      TTG       1 |       TCG       1 |       TAG       0 | Trp W TGG       4
+------------------------------------------------------------------------------
+Leu L CTT       6 | Pro P CCT       8 | His H CAT       3 | Arg R CGT       1
+      CTC       7 |       CCC       3 |       CAC       0 |       CGC       0
+      CTA       6 |       CCA       9 | Gln Q CAA       9 |       CGA       1
+      CTG       5 |       CCG       2 |       CAG       7 |       CGG       0
+------------------------------------------------------------------------------
+Ile I ATT      11 | Thr T ACT       2 | Asn N AAT      16 | Ser S AGT       7
+      ATC       6 |       ACC       4 |       AAC       5 |       AGC       1
+      ATA       2 |       ACA       5 | Lys K AAA      19 | Arg R AGA       6
+Met M ATG      10 |       ACG       2 |       AAG      17 |       AGG       4
+------------------------------------------------------------------------------
+Val V GTT       7 | Ala A GCT       2 | Asp D GAT      10 | Gly G GGT       9
+      GTC       5 |       GCC       3 |       GAC      10 |       GGC       8
+      GTA       5 |       GCA       3 | Glu E GAA      19 |       GGA       2
+      GTG       7 |       GCG       0 |       GAG      12 |       GGG       2
+------------------------------------------------------------------------------
+
+
+Codon position x base (3x4) table, overall
+
+position  1:    T:0.18644    C:0.18927    A:0.33051    G:0.29379
+position  2:    T:0.31356    C:0.16384    A:0.38983    G:0.13277
+position  3:    T:0.31356    C:0.19774    A:0.27966    G:0.20904
+
+
+Nei & Gojobori 1986. dN/dS (dN, dS)
+(Note: This matrix is not used in later m.l. analysis.
+Use runmode = -2 for ML pairwise comparison.)
+
+BC070280            
+pseudogene           0.5224 (0.1421 0.2721)
+
+pairwise comparison, codon frequencies: F3x4.
+
+
+2 (pseudogene) ... 1 (BC070280)
+lnL =-1014.258355
+  0.52723  1.94064  0.59797
+
+t= 0.5272  S=   129.6  N=   401.4  dN/dS= 0.5980  dN= 0.1510  dS= 0.2525
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/for_paml/test.codon	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,23 @@
+   2    570
+BC070280
+ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC
+GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA
+ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC
+ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC
+AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT
+ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT
+GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG
+CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA
+CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG
+TACAAATTTGAAGTATATGAGAAGAATGAT
+pseudogene
+------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT
+GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA
+CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG
+ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC
+AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT
+ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT
+GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG
+CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA
+CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG
+TACAAATTTGAAGTATATGAAAAGAATGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/for_paml/test.tree	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,1 @@
+(BC070280, pseudogene);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/test.aln	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,19 @@
+CLUSTAL W (1.82) multiple sequence alignment
+
+
+BC070280        MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEGKQNLVIMGKKTWFS
+pseudogene      ----LNCIVNVSQKMGIIRNGDLP*PQLKNKF2-FQRMTTPSSAEGKENLVFLIRKNWFS
+                    #################             ##########################
+
+
+BC070280        IPEKNRPLKGRINLVLSRELKEPPQGAHFLSRSLDDALKLTEQPELANKVDMLWIVGGSS
+pseudogene      ITEKNQPLKYIINLVVSRESKEPPQRPPFLD*SLGDALKRIEQLKLANKQDVFFTVGGSS
+                ###############                    #####################
+
+
+BC070280        VYKEAMNHPGHLKLFVTRIMQDFESDTFF-PEIDLEKYKLLPEYP-GVLSDVQEEKGIKY
+pseudogene      VYKESMN*-DHFKLFVTWIMQDFQSDTFFS4EGDLEKYKLLPEYPQGVVSDVEEEKGIKY
+
+
+BC070280        KFEVYEKND
+pseudogene      KFEVYEKND
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/test.nuc	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,29 @@
+>BC0700280  dihydrofolate reductase (human)
+TGTAACGAGC GGGCTCGGAG GTCCTCCCGC TGCTGTCATG GTTGGTTCGC TAAACTGCAT
+CGTCGCTGTG TCCCAGAACA TGGGCATCGG CAAGAACGGG GACCTGCCCT GGCCACCGCT
+CAGGAATGAA TTCAGATATT TCCAGAGAAT GACCACAACC TCTTCAGTAG AAGGTAAACA
+GAATCTGGTG ATTATGGGTA AGAAGACCTG GTTCTCCATT CCTGAGAAGA ATCGACCTTT
+AAAGGGTAGA ATTAATTTAG TTCTCAGCAG AGAACTCAAG GAACCTCCAC AAGGAGCTCA
+TTTTCTTTCC AGAAGTCTAG ATGATGCCTT AAAACTTACT GAACAACCAG AATTAGCAAA
+TAAAGTAGAC ATGCTCTGGA TAGTTGGTGG CAGTTCTGTT TATAAGGAAG CCATGAATCA
+CCCAGGCCAT CTTAAACTAT TTGTGACAAG GATCATGCAA GACTTTGAAA GTGACACGTT
+TTTTCCAGAA ATTGATTTGG AGAAATATAA ACTTCTGCCA GAATACCCAG GTGTTCTCTC
+TGATGTCCAG GAGGAGAAAG GCATTAAGTA CAAATTTGAA GTATATGAGA AGAATGATTA
+ATATGAAGGT GTTTTCTAGT TTAAGTTGTT CCCCCTCCCT CTGAAAAAAG TATGTATTTT
+TACATTAGAA AAGGTTTTTT GTTGACTTTA GATCTATAAT TATTTCTAAG CAACTTGTTT
+TTATTCCCCA CTACTCTTGT CTCTATCAGA TACCATTTAT GAGACATTCT TGCTATAACT
+AAGTGCTTCT CCAAGACCCC AACTGAGTCC CCAGCACCTG CTACAGTGAG CTGCCATTCC
+ACACCCATCA CATGTGGCAC TCTTGCCAGT CCTTGACATT GTCGGGCTTT TCACATGTTG
+GTAATATTTA TTAAAGATGA AGATCCACAT ACCCTTCAAA AAAAAAAAAA AAAAAAAAAA
+AAAAAAA
+>pseudogene  dihydrofolate reductase pseudogene (human)
+CTAAACTGCA TTGTCAATGA TTCCCAGAAG ATGGGCATCA TCAGGAATGG GGACCTGCCC
+TGACCTCAGC TCAAAAATAA ATTCGATTCC AAAGAATGAC CACACCCTCT TCAGCAGAGG
+GTAAAGAAAA TTTAGTATTT TTAATTAGGA AGAACTGGTT CTCGATTACT GAGAAGAATC
+AACCTTTAAA GTATATAATT AATTTAGTTG TCAGTAGAGA ATCCAAGGAA CCACCGCAAA
+GACCTCCTTT TCTTGACTAA AGTCTGGGTG ATGCCTTAAA ACGTATTGAG CAACTAAAAT
+TAGCAAATAA ACAAGACGTG TTTTTTACAG TGGGAGGCAG TTCTGTTTAT AAGGAATCCA
+TGAATTGAGA CCATTTTAAA CTATTTGTGA CATGGATCAT GCAGGACTTT CAAAGTGACA
+CGTTTTTTTC CCCTAGAAGG TGATTTAGAG AAATATAAAC TTCTCCCAGA ATACCCACAA
+GGTGTTGTCT CTGATGTGGA GGAGGAGAAA GGCATTAAGT ACAAATTTGA AGTATATGAA
+AAGAATGAT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/test_bc070280.fasta	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,18 @@
+>BC0700280  dihydrofolate reductase (human)
+TGTAACGAGC GGGCTCGGAG GTCCTCCCGC TGCTGTCATG GTTGGTTCGC TAAACTGCAT
+CGTCGCTGTG TCCCAGAACA TGGGCATCGG CAAGAACGGG GACCTGCCCT GGCCACCGCT
+CAGGAATGAA TTCAGATATT TCCAGAGAAT GACCACAACC TCTTCAGTAG AAGGTAAACA
+GAATCTGGTG ATTATGGGTA AGAAGACCTG GTTCTCCATT CCTGAGAAGA ATCGACCTTT
+AAAGGGTAGA ATTAATTTAG TTCTCAGCAG AGAACTCAAG GAACCTCCAC AAGGAGCTCA
+TTTTCTTTCC AGAAGTCTAG ATGATGCCTT AAAACTTACT GAACAACCAG AATTAGCAAA
+TAAAGTAGAC ATGCTCTGGA TAGTTGGTGG CAGTTCTGTT TATAAGGAAG CCATGAATCA
+CCCAGGCCAT CTTAAACTAT TTGTGACAAG GATCATGCAA GACTTTGAAA GTGACACGTT
+TTTTCCAGAA ATTGATTTGG AGAAATATAA ACTTCTGCCA GAATACCCAG GTGTTCTCTC
+TGATGTCCAG GAGGAGAAAG GCATTAAGTA CAAATTTGAA GTATATGAGA AGAATGATTA
+ATATGAAGGT GTTTTCTAGT TTAAGTTGTT CCCCCTCCCT CTGAAAAAAG TATGTATTTT
+TACATTAGAA AAGGTTTTTT GTTGACTTTA GATCTATAAT TATTTCTAAG CAACTTGTTT
+TTATTCCCCA CTACTCTTGT CTCTATCAGA TACCATTTAT GAGACATTCT TGCTATAACT
+AAGTGCTTCT CCAAGACCCC AACTGAGTCC CCAGCACCTG CTACAGTGAG CTGCCATTCC
+ACACCCATCA CATGTGGCAC TCTTGCCAGT CCTTGACATT GTCGGGCTTT TCACATGTTG
+GTAATATTTA TTAAAGATGA AGATCCACAT ACCCTTCAAA AAAAAAAAAA AAAAAAAAAA
+AAAAAAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/test_dup.nuc	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,29 @@
+>BC0700280  dihydrofolate reductase (human)
+TGTAACGAGC GGGCTCGGAG GTCCTCCCGC TGCTGTCATG GTTGGTTCGC TAAACTGCAT
+CGTCGCTGTG TCCCAGAACA TGGGCATCGG CAAGAACGGG GACCTGCCCT GGCCACCGCT
+CAGGAATGAA TTCAGATATT TCCAGAGAAT GACCACAACC TCTTCAGTAG AAGGTAAACA
+GAATCTGGTG ATTATGGGTA AGAAGACCTG GTTCTCCATT CCTGAGAAGA ATCGACCTTT
+AAAGGGTAGA ATTAATTTAG TTCTCAGCAG AGAACTCAAG GAACCTCCAC AAGGAGCTCA
+TTTTCTTTCC AGAAGTCTAG ATGATGCCTT AAAACTTACT GAACAACCAG AATTAGCAAA
+TAAAGTAGAC ATGCTCTGGA TAGTTGGTGG CAGTTCTGTT TATAAGGAAG CCATGAATCA
+CCCAGGCCAT CTTAAACTAT TTGTGACAAG GATCATGCAA GACTTTGAAA GTGACACGTT
+TTTTCCAGAA ATTGATTTGG AGAAATATAA ACTTCTGCCA GAATACCCAG GTGTTCTCTC
+TGATGTCCAG GAGGAGAAAG GCATTAAGTA CAAATTTGAA GTATATGAGA AGAATGATTA
+ATATGAAGGT GTTTTCTAGT TTAAGTTGTT CCCCCTCCCT CTGAAAAAAG TATGTATTTT
+TACATTAGAA AAGGTTTTTT GTTGACTTTA GATCTATAAT TATTTCTAAG CAACTTGTTT
+TTATTCCCCA CTACTCTTGT CTCTATCAGA TACCATTTAT GAGACATTCT TGCTATAACT
+AAGTGCTTCT CCAAGACCCC AACTGAGTCC CCAGCACCTG CTACAGTGAG CTGCCATTCC
+ACACCCATCA CATGTGGCAC TCTTGCCAGT CCTTGACATT GTCGGGCTTT TCACATGTTG
+GTAATATTTA TTAAAGATGA AGATCCACAT ACCCTTCAAA AAAAAAAAAA AAAAAAAAAA
+AAAAAAA
+>pseudogene  dihydrofolate reductase pseudogene (human)
+CTAAACTGCA TTGTCAATGA TTCCCAGAAG ATGGGCATCA TCAGGAATGG GGACCTGCCC
+TGACCTCAGC TCAAAAATAA ATTCGATTCC AAAGAATGAC CACACCCTCT TCAGCAGAGG
+GTAAAGAAAA TTTAGTATTT TTAATTAGGA AGAACTGGTT CTCGATTACT GAGAAGAATC
+AACCTTTAAA GTATATAATT AATTTAGTTG TCAGTAGAGA ATCCAAGGAA CCACCGCAAA
+GACCTCCTTT TCTTGACTAA AGTCTGGGTG ATGCCTTAAA ACGTATTGAG CAACTAAAAT
+TAGCAAATAA ACAAGACGTG TTTTTTACAG TGGGAGGCAG TTCTGTTTAT AAGGAATCCA
+TGAATTGAGA CCATTTTAAA CTATTTGTGA CATGGATCAT GCAGGACTTT CAAAGTGACA
+CGTTTTTTTC CCCTAGAAGG TGATTTAGAG AAATATAAAC TTCTCCCAGA ATACCCACAA
+GGTGTTGTCT CTGATGTGGA GGAGGAGAAA GGCATTAAGT ACAAATTTGA AGTATATGAA
+AAGAATGAT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inputs/test_pseudogene.fasta	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,11 @@
+>pseudogene  dihydrofolate reductase pseudogene (human)
+CTAAACTGCA TTGTCAATGA TTCCCAGAAG ATGGGCATCA TCAGGAATGG GGACCTGCCC
+TGACCTCAGC TCAAAAATAA ATTCGATTCC AAAGAATGAC CACACCCTCT TCAGCAGAGG
+GTAAAGAAAA TTTAGTATTT TTAATTAGGA AGAACTGGTT CTCGATTACT GAGAAGAATC
+AACCTTTAAA GTATATAATT AATTTAGTTG TCAGTAGAGA ATCCAAGGAA CCACCGCAAA
+GACCTCCTTT TCTTGACTAA AGTCTGGGTG ATGCCTTAAA ACGTATTGAG CAACTAAAAT
+TAGCAAATAA ACAAGACGTG TTTTTTACAG TGGGAGGCAG TTCTGTTTAT AAGGAATCCA
+TGAATTGAGA CCATTTTAAA CTATTTGTGA CATGGATCAT GCAGGACTTT CAAAGTGACA
+CGTTTTTTTC CCCTAGAAGG TGATTTAGAG AAATATAAAC TTCTCCCAGA ATACCCACAA
+GGTGTTGTCT CTGATGTGGA GGAGGAGAAA GGCATTAAGT ACAAATTTGA AGTATATGAA
+AAGAATGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_block_nomismatch.aln	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,14 @@
+CLUSTAL W multiple sequence alignment
+
+BC070280      CTAAACTGCATCGTCGCTTCCCAGAACATGGGCATCGGCAACGGGTTCCAGATGACCACA
+pseudogene    CTAAACTGCATTGTCAATTCCCAGAAGATGGGCATCATCAATGGGTTCCAAATGACCACA
+
+BC070280      ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGACCTGGTTCTCCATT
+pseudogene    CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAAGAACTGGTTCTCGATT
+
+BC070280      CCTGAGAAGAATCGACCTTTAAAGGGTATTAATTTAGTTGATGCCTTAAAACTTACTGAA
+pseudogene    ACTGAGAAGAATCAACCTTTAAAGTATATTAATTTAGTTGATGCCTTAAAACGTATTGAG
+
+BC070280      CAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGGTT
+pseudogene    CAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTGTG
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_clustal.aln	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,42 @@
+CLUSTAL W multiple sequence alignment
+
+BC070280      ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC
+pseudogene    ------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT
+                          ################################################
+
+BC070280      GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA
+pseudogene    GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA
+              ###                                       ##################
+
+BC070280      ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC
+pseudogene    CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG
+              ############################################################
+
+BC070280      ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC
+pseudogene    ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC
+              #############################################               
+
+BC070280      AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT
+pseudogene    AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT
+                                                           ###############
+
+BC070280      ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT
+pseudogene    ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT
+              ################################################            
+
+BC070280      GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG
+pseudogene    GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG
+                                                                          
+
+BC070280      CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA
+pseudogene    CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA
+                                                                          
+
+BC070280      CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG
+pseudogene    CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG
+                                                                          
+
+BC070280      TACAAATTTGAAGTATATGAGAAGAATGAT
+pseudogene    TACAAATTTGAAGTATATGAAAAGAATGAT
+                                            
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_clustal_multi.aln	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,42 @@
+CLUSTAL W multiple sequence alignment
+
+BC070280      ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC
+pseudogene    ------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT
+                          ################################################
+
+BC070280      GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA
+pseudogene    GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA
+              ###                                       ##################
+
+BC070280      ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC
+pseudogene    CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG
+              ############################################################
+
+BC070280      ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC
+pseudogene    ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC
+              #############################################               
+
+BC070280      AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT
+pseudogene    AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT
+                                                           ###############
+
+BC070280      ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT
+pseudogene    ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT
+              ################################################            
+
+BC070280      GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG
+pseudogene    GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG
+                                                                          
+
+BC070280      CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA
+pseudogene    CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA
+                                                                          
+
+BC070280      CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG
+pseudogene    CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG
+                                                                          
+
+BC070280      TACAAATTTGAAGTATATGAGAAGAATGAT
+pseudogene    TACAAATTTGAAGTATATGAAAAGAATGAT
+                                            
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_codon.txt	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,60 @@
+              M   V   G   S   L   N   C   I   V   A   V   S   Q   N   M   G   I   G   K   N
+BC070280      ATG GTT GGT TCG CTA AAC TGC ATC GTC GCT GTG TCC CAG AAC ATG GGC ATC GGC AAG AAC
+              -   -   -   -   L   N   C   I   V   N   V   S   Q   K   M   G   I   I   R   N
+pseudogene    --- --- --- --- CTA AAC TGC ATT GTC AAT GAT TCC CAG AAG ATG GGC ATC ATC AGG AAT
+                              ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
+
+              G   D   L   P   W   P   P   L   R   N   E   F   R   Y   F   Q   R   M   T   T
+BC070280      GGG GAC CTG CCC TGG CCA CCG CTC AGG AAT GAA TTC AGA TAT TTC CAG AGA ATG ACC ACA
+              G   D   L   P   *   P   Q   L   K   N   K   F   2   -   F   Q   R   M   T   T
+pseudogene    GGG GAC CTG CCC TGA CCT CAG CTC AAA AAT AAA TTC GA- --- TTC CAA AGA ATG ACC ACA
+              ###                                                     ### ### ### ### ### ###
+
+              T   S   S   V   E   G   K   Q   N   L   V   I   M   G   K   K   T   W   F   S
+BC070280      ACC TCT TCA GTA GAA GGT AAA CAG AAT CTG GTG ATT ATG GGT AAG AAG ACC TGG TTC TCC
+              P   S   S   A   E   G   K   E   N   L   V   F   L   I   R   K   N   W   F   S
+pseudogene    CCC TCT TCA GCA GAG GGT AAA GAA AAT TTA GTA TTT TTA ATT AGG AAG AAC TGG TTC TCG
+              ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
+
+              I   P   E   K   N   R   P   L   K   G   R   I   N   L   V   L   S   R   E   L
+BC070280      ATT CCT GAG AAG AAT CGA CCT TTA AAG GGT AGA ATT AAT TTA GTT CTC AGC AGA GAA CTC
+              I   T   E   K   N   Q   P   L   K   Y   I   I   N   L   V   V   S   R   E   S
+pseudogene    ATT ACT GAG AAG AAT CAA CCT TTA AAG TAT ATA ATT AAT TTA GTT GTC AGT AGA GAA TCC
+              ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###                    
+
+              K   E   P   P   Q   G   A   H   F   L   S   R   S   L   D   D   A   L   K   L
+BC070280      AAG GAA CCT CCA CAA GGA GCT CAT TTT CTT TCC AGA AGT CTA GAT GAT GCC TTA AAA CTT
+              K   E   P   P   Q   R   P   P   F   L   D   *   S   L   G   D   A   L   K   R
+pseudogene    AAG GAA CCA CCG CAA AGA CCT CCT TTT CTT GAC TAA AGT CTG GGT GAT GCC TTA AAA CGT
+                                                                          ### ### ### ### ###
+
+              T   E   Q   P   E   L   A   N   K   V   D   M   L   W   I   V   G   G   S   S
+BC070280      ACT GAA CAA CCA GAA TTA GCA AAT AAA GTA GAC ATG CTC TGG ATA GTT GGT GGC AGT TCT
+              I   E   Q   L   K   L   A   N   K   Q   D   V   F   F   T   V   G   G   S   S
+pseudogene    ATT GAG CAA CTA AAA TTA GCA AAT AAA CAA GAC GTG TTT TTT ACA GTG GGA GGC AGT TCT
+              ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###                
+
+              V   Y   K   E   A   M   N   H   P   G   H   L   K   L   F   V   T   R   I   M
+BC070280      GTT TAT AAG GAA GCC ATG AAT CAC CCA GGC CAT CTT AAA CTA TTT GTG ACA AGG ATC ATG
+              V   Y   K   E   S   M   N   *   -   D   H   F   K   L   F   V   T   W   I   M
+pseudogene    GTT TAT AAG GAA TCC ATG AAT TGA --- GAC CAT TTT AAA CTA TTT GTG ACA TGG ATC ATG
+                                                                                             
+
+              Q   D   F   E   S   D   T   F   F   -   P   -   E   I   D   L   E   K   Y   K
+BC070280      CAA GAC TTT GAA AGT GAC ACG TTT TTT --- CCA --- GAA ATT GAT TTG GAG AAA TAT AAA
+              Q   D   F   Q   S   D   T   F   F   S   4   -   E   G   D   L   E   K   Y   K
+pseudogene    CAG GAC TTT CAA AGT GAC ACG TTT TTT TCC CCT A-- GAA GGT GAT TTA GAG AAA TAT AAA
+                                                                                             
+
+              L   L   P   E   Y   P   -   G   V   L   S   D   V   Q   E   E   K   G   I   K
+BC070280      CTT CTG CCA GAA TAC CCA --- GGT GTT CTC TCT GAT GTC CAG GAG GAG AAA GGC ATT AAG
+              L   L   P   E   Y   P   Q   G   V   V   S   D   V   E   E   E   K   G   I   K
+pseudogene    CTT CTC CCA GAA TAC CCA CAA GGT GTT GTC TCT GAT GTG GAG GAG GAG AAA GGC ATT AAG
+                                                                                             
+
+              Y   K   F   E   V   Y   E   K   N   D
+BC070280      TAC AAA TTT GAA GTA TAT GAG AAG AAT GAT
+              Y   K   F   E   V   Y   E   K   N   D
+pseudogene    TAC AAA TTT GAA GTA TAT GAA AAG AAT GAT
+                                                     
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_html.html	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,16 @@
+<pre>
+CLUSTAL W multiple sequence alignment
+
+BC070280      CTAAACTGCATCGTCGCTTCCCAGAACATGGGCATCGGCAACGGGTTCCAGATGACCACA
+pseudogene    CTAAACTGCATTGTCAATTCCCAGAAGATGGGCATCATCAATGGGTTCCAAATGACCACA
+
+BC070280      ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGACCTGGTTCTCCATT
+pseudogene    CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAAGAACTGGTTCTCGATT
+
+BC070280      CCTGAGAAGAATCGACCTTTAAAGGGTATTAATTTAGTTGATGCCTTAAAACTTACTGAA
+pseudogene    ACTGAGAAGAATCAACCTTTAAAGTATATTAATTTAGTTGATGCCTTAAAACGTATTGAG
+
+BC070280      CAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGGTT
+pseudogene    CAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTGTG
+
+</pre>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_nogap.fasta	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,20 @@
+>BC070280
+CTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAACGGGGACCTGCCC
+CCACCGCTCAGGAATGAATTCTTCCAGAGAATGACCACAACCTCTTCAGTAGAAGGTAAA
+CAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCCATTCCTGAGAAGAATCGACCT
+TTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTCAAGGAACCTCCACAAGGAGCT
+CATTTTCTTTCCAGTCTAGATGATGCCTTAAAACTTACTGAACAACCAGAATTAGCAAAT
+AAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCTGTTTATAAGGAAGCCATGAATGGC
+CATCTTAAACTATTTGTGACAAGGATCATGCAAGACTTTGAAAGTGACACGTTTTTTCCA
+GAAATTGATTTGGAGAAATATAAACTTCTGCCAGAATACCCAGGTGTTCTCTCTGATGTC
+CAGGAGGAGAAAGGCATTAAGTACAAATTTGAAGTATATGAGAAGAATGAT
+>pseudogene
+CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAATGGGGACCTGCCC
+CCTCAGCTCAAAAATAAATTCTTCCAAAGAATGACCACACCCTCTTCAGCAGAGGGTAAA
+GAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCGATTACTGAGAAGAATCAACCT
+TTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCCAAGGAACCACCGCAAAGACCT
+CCTTTTCTTGACAGTCTGGGTGATGCCTTAAAACGTATTGAGCAACTAAAATTAGCAAAT
+AAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCTGTTTATAAGGAATCCATGAATGAC
+CATTTTAAACTATTTGTGACATGGATCATGCAGGACTTTCAAAGTGACACGTTTTTTCCT
+GAAGGTGATTTAGAGAAATATAAACTTCTCCCAGAATACCCAGGTGTTGTCTCTGATGTG
+GAGGAGGAGAAAGGCATTAAGTACAAATTTGAAGTATATGAAAAGAATGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outputs/expected_paml.paml	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,23 @@
+   2    570
+BC070280
+ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC
+GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA
+ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC
+ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC
+AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT
+ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT
+GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG
+CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA
+CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG
+TACAAATTTGAAGTATATGAGAAGAATGAT
+pseudogene
+------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT
+GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA
+CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG
+ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC
+AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT
+ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT
+GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG
+CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA
+CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG
+TACAAATTTGAAGTATATGAAAAGAATGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests.xml	Sun Nov 09 10:56:21 2025 +0000
@@ -0,0 +1,49 @@
+<macros>
+    <xml name="tests">
+        <tests>
+            <test expect_num_outputs="1">
+                <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" />
+                <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" />
+                <param name="output_format" value="paml" />
+                <output name="output_file" file="outputs/expected_paml.paml" ftype="phylip" />
+            </test>
+            <test expect_num_outputs="1">
+                <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" />
+                <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" />
+                <param name="output_format" value="clustal" />
+                <output name="output_file" file="outputs/expected_clustal.aln" ftype="clustal" />
+            </test>
+            <test expect_num_outputs="1">
+                <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" />
+                <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" />
+                <param name="output_format" value="fasta" />
+                <param name="remove_gaps" value="true" />
+                <output name="output_file" file="outputs/expected_nogap.fasta" ftype="fasta" />
+            </test>
+            <test expect_num_outputs="1">
+                <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" />
+                <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" />
+                <param name="output_format" value="codon" />
+                <output name="output_file" file="outputs/expected_codon.txt" ftype="txt" />
+            </test>
+            <test expect_num_outputs="1">
+                <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" />
+                <param name="nucleotide_fastas" value="inputs/test_bc070280.fasta,inputs/test_pseudogene.fasta" ftype="fasta" />
+                <param name="output_format" value="clustal" />
+                <output name="output_file" file="outputs/expected_clustal_multi.aln" ftype="clustal" />
+            </test>
+            <test expect_num_outputs="2">
+                <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" />
+                <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" />
+                <param name="output_format" value="clustal" />
+                <param name="show_only_blocks" value="true" />
+                <param name="remove_mismatches" value="true" />
+                <param name="genetic_code" value="2" />
+                <param name="html_output" value="true" />
+                <param name="suppress_stderr" value="true" />
+                <output name="output_file" file="outputs/expected_block_nomismatch.aln" ftype="clustal" />
+                <output name="html_output_file" file="outputs/expected_html.html" ftype="html" />
+            </test>
+        </tests>
+    </xml>
+</macros>