Mercurial > repos > iuc > pal2nal
changeset 0:a14d5c1e1fc4 draft default tip
planemo upload for repository https://github.com/georgehe23/tools-iuc/tree/main/tools/pal2nal commit aed49bdc26e503297e1fc394ada087042dc23386
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/info.xml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,53 @@ +<macros> + <token name="@TOOL_VERSION@">14.1</token> + <token name="@WRAPPER_VERSION@">0</token> + <xml name="help"> + <help><![CDATA[ +**Overview** + +PAL2NAL converts a protein multiple sequence alignment plus the matching nucleotide FASTA records into a codon-aware alignment suitable for downstream Ka/Ks analyses. The script is maintained by Mikita Suyama (Kyushu University) and is distributed under GPL v2. This Galaxy wrapper surfaces the upstream `pal2nal.pl` tool so that codon alignments can be created inside workflows. + +**Inputs** + +* *Protein alignment (pep.aln)* — CLUSTAL or FASTA alignment of the translated sequences. Alignments may contain more than two sequences, and frame-shift events can be annotated by numeric placeholders (for example, `2` indicates a single base deletion; see the bundled `test.aln` example). +* *Nucleotide FASTA (nuc.fasta)* — Corresponding DNA or mRNA sequences. Attach one or more FASTA datasets (use *Add new Nucleotide FASTA files* for additional inputs). Sequence identifiers must match those in the protein alignment; order is detected automatically when identifiers match. + +**Options** + +* `-output clustal|paml|fasta|codon` (Galaxy: *Output format*). +* `-blockonly` — Restrict output to user-marked blocks (`#` rows in CLUSTAL alignments). +* `-nogap` — Remove codons containing gaps or in-frame stops. +* `-nomismatch` — Remove codons where amino acid and nucleotide sequences disagree (useful for discarding pseudogene regions). +* `-codontable` — Choose an NCBI genetic code (1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15, 16, 21, 22, 23). +* `-html` — Produce HTML formatted output (Galaxy: enable *Add HTML formatted output* to store the optional HTML view). +* `-nostderr` — Suppress STDERR warnings (for example, expected pseudogene mismatches). + +**Outputs** + +* Codon-respecting alignment rendered in the selected format (Galaxy labels the dataset as CLUSTAL, PAML, FASTA, HTML, or plain text as appropriate). + +**Example:** + +```pal2nal.pl inputs/test.aln inputs/test.nuc -output paml -nogap > inputs/for_paml/test.codon``` + +**Ka/Ks calculation** + +To compute Ka and Ks values, run the resulting codon alignment through PAML's `codeml`, as illustrated in the PAL2NAL distribution (`inputs/for_paml/test.cnt`, `test.tree`, `test.codeml.ori`). + +**Warnings** + +PAL2NAL issues messages when protein residues and underlying codons disagree (for example, pseudogene cases). These warnings are harmless unless they indicate unintended mismatches; enable *Suppress STDERR messages* to hide them. + +**References and contacts** + +* PAL2NAL website: http://www.bork.embl.de/pal2nal +* Support: Mikita Suyama (mikita@bioreg.kyushu-u.ac.jp) +* Example data: `inputs/test.aln`, `inputs/test.nuc`, and PAML helpers inside the `inputs/for_paml/` directory. + ]]></help> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/nar/gkl315</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,39 @@ +<macros> + <xml name="version_command"> + <version_command>pal2nal.pl | head -n 1</version_command> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">pal2nal</requirement> + </requirements> + </xml> + <xml name="command"> + <command detect_errors="exit_code"><![CDATA[ +pal2nal.pl '$protein_alignment' +#for $nuc in $nucleotide_fastas: + '$nuc' +#end for + -output $output_format + $show_only_blocks + $remove_gaps + $remove_mismatches + -codontable $genetic_code + $suppress_stderr + > '$output_file'; +#if $html_output: +pal2nal.pl '$protein_alignment' +#for $nuc in $nucleotide_fastas: + '$nuc' +#end for + -output $output_format + $show_only_blocks + $remove_gaps + $remove_mismatches + -codontable $genetic_code + -html + $suppress_stderr + > '$html_output_file' +#end if + ]]></command> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pal2nal.xml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,84 @@ +<tool id="pal2nal" name="PAL2NAL" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="24.0"> + <description>Codon-based nucleotide alignment from protein and DNA sequences</description> + + <macros> + <import>info.xml</import> + <import>macros.xml</import> + <import>tests.xml</import> + </macros> + + <expand macro="requirements" /> + + <expand macro="version_command" /> + + <expand macro="command" /> + + <inputs> + <param name="protein_alignment" type="data" format="fasta,clustal" label="Protein alignment" + help="Accepts CLUSTAL/FASTA alignments uploaded through Galaxy." /> + <param name="nucleotide_fastas" type="data" format="fasta" multiple="true" min="1" + label="Nucleotide FASTA files" help="Accepts FASTA-formatted nucleotide sequences." /> + + <param name="output_format" type="select" argument="-output" label="Output format"> + <option value="clustal" selected="true">CLUSTAL (default)</option> + <option value="paml">PAML</option> + <option value="fasta">FASTA</option> + <option value="codon">CODON</option> + </param> + + <param name="show_only_blocks" type="boolean" argument="-blockonly" truevalue="-blockonly" falsevalue="" + checked="false" label="Show only user-specified blocks" help="Use only '#' marked conserved blocks under CLUSTAL alignment." /> + + <param name="remove_gaps" type="boolean" argument="-nogap" truevalue="-nogap" falsevalue="" + checked="false" label="Remove codons with gaps or in-frame stop codons" /> + + <param name="remove_mismatches" type="boolean" argument="-nomismatch" truevalue="-nomismatch" falsevalue="" + checked="false" label="Remove mismatched codons" /> + + <param name="genetic_code" type="select" argument="-codontable" label="Genetic code"> + <option value="1" selected="true">1: Universal</option> + <option value="2">2: Vertebrate mitochondrial</option> + <option value="3">3: Yeast mitochondrial</option> + <option value="4">4: Mold/Protozoan/Coelenterate mito + Mycoplasma</option> + <option value="5">5: Invertebrate mitochondrial</option> + <option value="6">6: Ciliate/Hexamita nuclear</option> + <option value="9">9: Echinoderm/Flatworm mitochondrial</option> + <option value="10">10: Euplotid nuclear</option> + <option value="11">11: Bacterial/Archaeal/Plastid</option> + <option value="12">12: Alternative yeast nuclear</option> + <option value="13">13: Ascidian mitochondrial</option> + <option value="14">14: Alternative flatworm mitochondrial</option> + <option value="15">15: Blepharisma nuclear</option> + <option value="16">16: Chlorophycean mitochondrial</option> + <option value="21">21: Trematode mitochondrial</option> + <option value="22">22: Scenedesmus obliquus mitochondrial</option> + <option value="23">23: Thraustochytrium mitochondrial</option> + </param> + + <param name="html_output" type="boolean" truevalue="true" falsevalue="" + checked="false" label="Add HTML formatted output" help="Produce an additional dataset with PAL2NAL's HTML view." /> + + <param name="suppress_stderr" type="boolean" argument="-nostderr" truevalue="-nostderr" falsevalue="" + checked="false" label="Suppress STDERR messages" help="Hide warning messages (use for automated pipelines)." /> + </inputs> + + <outputs> + <data name="output_file" format="txt" label="PAL2NAL codon alignment output"> + <change_format> + <when input="output_format" value="clustal" format="clustal" /> + <when input="output_format" value="fasta" format="fasta" /> + <when input="output_format" value="paml" format="phylip" /> + <when input="output_format" value="codon" format="txt" /> + </change_format> + </data> + <data name="html_output_file" format="html" label="${tool.name} on ${on_string}: HTML view"> + <filter>html_output</filter> + </data> + </outputs> + + <expand macro="tests" /> + + <expand macro="help" /> + + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/for_paml/test.cnt Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,36 @@ + seqfile = test.codon + treefile = test.tree + outfile = test.codeml + + noisy = 0 * 0,1,2,3,9: how much rubbish on the screen + verbose = 0 * 1: detailed output, 0: concise output + runmode = -2 * 0: user tree; 1: semi-automatic; 2: automatic + * 3: StepwiseAddition; (4,5):PerturbationNNI; -2: pairwise + + cleandata = 1 * "I added on 07/07/2004" Mikita Suyama + + seqtype = 1 * 1:codons; 2:AAs; 3:codons-->AAs + CodonFreq = 2 * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table + model = 2 + * models for codons: + * 0:one, 1:b, 2:2 or more dN/dS ratios for branches + + NSsites = 0 * dN/dS among sites. 0:no variation, 1:neutral, 2:positive + icode = 0 * 0:standard genetic code; 1:mammalian mt; 2-10:see below + Mgene = 0 * 0:rates, 1:separate; 2:pi, 3:kappa, 4:all + + fix_kappa = 0 * 1: kappa fixed, 0: kappa to be estimated + kappa = 2 * initial or fixed kappa + fix_omega = 0 * 1: omega or omega_1 fixed, 0: estimate + omega = 1 * initial or fixed omega, for codons or codon-transltd AAs + + fix_alpha = 1 * 0: estimate gamma shape parameter; 1: fix it at alpha + alpha = .0 * initial or fixed alpha, 0:infinity (constant rate) + Malpha = 0 * different alphas for genes + ncatG = 4 * # of categories in the dG or AdG models of rates + + clock = 0 * 0: no clock, unrooted tree, 1: clock, rooted tree + getSE = 0 * 0: don't want them, 1: want S.E.s of estimates + RateAncestor = 0 * (1/0): rates (alpha>0) or ancestral states (alpha=0) + method = 0 * 0: simultaneous; 1: one branch at a time +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/for_paml/test.codeml.ori Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,102 @@ +CODONML (in paml 3.14, March 2005) test.codon Model: several dN/dS ratios for branches +Codon frequencies: F3x4 + +ns = 2 ls = 177 +# site patterns = 105 + 2 1 1 1 1 1 1 1 1 1 4 3 2 1 2 + 1 1 4 1 1 2 1 1 1 7 2 3 1 2 1 + 2 1 3 1 1 2 2 8 1 1 1 1 1 1 7 + 1 1 1 3 1 4 1 1 4 1 1 3 2 1 6 + 1 1 1 2 1 1 1 6 2 1 3 1 1 4 1 + 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 + 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 + + +1 +BC070280 CTA AAC TGC ATC GTC GCT GTG TCC CAG AAC ATG GGC ATC GGC AAG AAC GGG GAC CTG CCC CCA CCG CTC AGG AAT GAA TTC CAG AGA ACC ACA ACC TCT TCA GTA GAA GGT AAA CAG CTG GTG ATT ATG GGT AAG ACC TGG TCC ATT CCT GAG CGA CCT TTA GGT AGA GTT CTC AGC GAA CTC CCT CCA CAA GGA GCT CAT TTT CTT TCC AGT CTA GAT GAT GCC CTT ACT CCA GCA GTA ATG CTC TGG ATA GTT GGT TAT GCC GGC CAT CTT GTG AGG CAA GAA ACG ATT TTG CTG CCA TAC GTC CAG GTA GAG +pseudogene ... ... ... ..T ... AA. .AT ... ... ..G ... ... ... AT. .G. ..T ... ... ... ... ..T .A. ... .AA ... A.. ... ..A ... ... ... C.. ... ... .C. ..G ... ... G.A T.A ..A T.. T.A AT. ... .A. ... ..G ... A.. ... .A. ... ... TA. .T. ... G.. ..T ... TC. ..A ..G ... A.. C.. .C. ... ... GA. ... ..G .G. ... ... .G. .T. .T. ... CA. G.. T.T .TT .C. ..G ..A ... T.. .A. ... T.. ... T.. ..G C.. ... GG. ..A ..C ... ... ..G G.. ... ..A + +Codon usage in sequences +-------------------------------------------------------------- +Phe TTT 6 10 | Ser TCT 3 3 | Tyr TAT 3 4 | Cys TGT 0 0 + TTC 3 3 | TCC 3 3 | TAC 2 2 | TGC 1 1 +Leu TTA 4 7 | TCA 1 1 | *** TAA 0 0 | *** TGA 0 0 + TTG 1 0 | TCG 0 1 | TAG 0 0 | Trp TGG 2 2 +-------------------------------------------------------------- +Leu CTT 4 2 | Pro CCT 3 5 | His CAT 2 1 | Arg CGT 0 1 + CTC 5 2 | CCC 1 2 | CAC 0 0 | CGC 0 0 + CTA 3 3 | CCA 6 3 | Gln CAA 3 6 | CGA 1 0 + CTG 3 2 | CCG 1 1 | CAG 4 3 | CGG 0 0 +-------------------------------------------------------------- +Ile ATT 5 6 | Thr ACT 1 1 | Asn AAT 7 9 | Ser AGT 3 4 + ATC 3 3 | ACC 3 1 | AAC 3 2 | AGC 1 0 + ATA 1 1 | ACA 2 3 | Lys AAA 8 11 | Arg AGA 3 3 +Met ATG 6 4 | ACG 1 1 | AAG 9 8 | AGG 2 2 +-------------------------------------------------------------- +Val GTT 4 3 | Ala GCT 2 0 | Asp GAT 5 5 | Gly GGT 5 4 + GTC 2 3 | GCC 2 1 | GAC 4 6 | GGC 5 3 + GTA 3 2 | GCA 1 2 | Glu GAA 11 8 | GGA 1 1 + GTG 3 4 | GCG 0 0 | GAG 5 7 | GGG 1 1 +-------------------------------------------------------------- + +Codon position x base (3x4) table for each sequence. + +#1: BC070280 +position 1: T:0.16384 C:0.20339 A:0.32768 G:0.30508 +position 2: T:0.31638 C:0.16949 A:0.37288 G:0.14124 +position 3: T:0.29944 C:0.21469 A:0.27119 G:0.21469 + +#2: pseudogene +position 1: T:0.20904 C:0.17514 A:0.33333 G:0.28249 +position 2: T:0.31073 C:0.15819 A:0.40678 G:0.12429 +position 3: T:0.32768 C:0.18079 A:0.28814 G:0.20339 + +Sums of codon usage counts +------------------------------------------------------------------------------ +Phe F TTT 16 | Ser S TCT 6 | Tyr Y TAT 7 | Cys C TGT 0 + TTC 6 | TCC 6 | TAC 4 | TGC 2 +Leu L TTA 11 | TCA 2 | *** * TAA 0 | *** * TGA 0 + TTG 1 | TCG 1 | TAG 0 | Trp W TGG 4 +------------------------------------------------------------------------------ +Leu L CTT 6 | Pro P CCT 8 | His H CAT 3 | Arg R CGT 1 + CTC 7 | CCC 3 | CAC 0 | CGC 0 + CTA 6 | CCA 9 | Gln Q CAA 9 | CGA 1 + CTG 5 | CCG 2 | CAG 7 | CGG 0 +------------------------------------------------------------------------------ +Ile I ATT 11 | Thr T ACT 2 | Asn N AAT 16 | Ser S AGT 7 + ATC 6 | ACC 4 | AAC 5 | AGC 1 + ATA 2 | ACA 5 | Lys K AAA 19 | Arg R AGA 6 +Met M ATG 10 | ACG 2 | AAG 17 | AGG 4 +------------------------------------------------------------------------------ +Val V GTT 7 | Ala A GCT 2 | Asp D GAT 10 | Gly G GGT 9 + GTC 5 | GCC 3 | GAC 10 | GGC 8 + GTA 5 | GCA 3 | Glu E GAA 19 | GGA 2 + GTG 7 | GCG 0 | GAG 12 | GGG 2 +------------------------------------------------------------------------------ + + +Codon position x base (3x4) table, overall + +position 1: T:0.18644 C:0.18927 A:0.33051 G:0.29379 +position 2: T:0.31356 C:0.16384 A:0.38983 G:0.13277 +position 3: T:0.31356 C:0.19774 A:0.27966 G:0.20904 + + +Nei & Gojobori 1986. dN/dS (dN, dS) +(Note: This matrix is not used in later m.l. analysis. +Use runmode = -2 for ML pairwise comparison.) + +BC070280 +pseudogene 0.5224 (0.1421 0.2721) + +pairwise comparison, codon frequencies: F3x4. + + +2 (pseudogene) ... 1 (BC070280) +lnL =-1014.258355 + 0.52723 1.94064 0.59797 + +t= 0.5272 S= 129.6 N= 401.4 dN/dS= 0.5980 dN= 0.1510 dS= 0.2525 + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/for_paml/test.codon Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,23 @@ + 2 570 +BC070280 +ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC +GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA +ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC +ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC +AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT +ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT +GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG +CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA +CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG +TACAAATTTGAAGTATATGAGAAGAATGAT +pseudogene +------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT +GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA +CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG +ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC +AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT +ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT +GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG +CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA +CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG +TACAAATTTGAAGTATATGAAAAGAATGAT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/for_paml/test.tree Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,1 @@ +(BC070280, pseudogene);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test.aln Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,19 @@ +CLUSTAL W (1.82) multiple sequence alignment + + +BC070280 MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEGKQNLVIMGKKTWFS +pseudogene ----LNCIVNVSQKMGIIRNGDLP*PQLKNKF2-FQRMTTPSSAEGKENLVFLIRKNWFS + ################# ########################## + + +BC070280 IPEKNRPLKGRINLVLSRELKEPPQGAHFLSRSLDDALKLTEQPELANKVDMLWIVGGSS +pseudogene ITEKNQPLKYIINLVVSRESKEPPQRPPFLD*SLGDALKRIEQLKLANKQDVFFTVGGSS + ############### ##################### + + +BC070280 VYKEAMNHPGHLKLFVTRIMQDFESDTFF-PEIDLEKYKLLPEYP-GVLSDVQEEKGIKY +pseudogene VYKESMN*-DHFKLFVTWIMQDFQSDTFFS4EGDLEKYKLLPEYPQGVVSDVEEEKGIKY + + +BC070280 KFEVYEKND +pseudogene KFEVYEKND \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test.nuc Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,29 @@ +>BC0700280 dihydrofolate reductase (human) +TGTAACGAGC GGGCTCGGAG GTCCTCCCGC TGCTGTCATG GTTGGTTCGC TAAACTGCAT +CGTCGCTGTG TCCCAGAACA TGGGCATCGG CAAGAACGGG GACCTGCCCT GGCCACCGCT +CAGGAATGAA TTCAGATATT TCCAGAGAAT GACCACAACC TCTTCAGTAG AAGGTAAACA +GAATCTGGTG ATTATGGGTA AGAAGACCTG GTTCTCCATT CCTGAGAAGA ATCGACCTTT +AAAGGGTAGA ATTAATTTAG TTCTCAGCAG AGAACTCAAG GAACCTCCAC AAGGAGCTCA +TTTTCTTTCC AGAAGTCTAG ATGATGCCTT AAAACTTACT GAACAACCAG AATTAGCAAA +TAAAGTAGAC ATGCTCTGGA TAGTTGGTGG CAGTTCTGTT TATAAGGAAG CCATGAATCA +CCCAGGCCAT CTTAAACTAT TTGTGACAAG GATCATGCAA GACTTTGAAA GTGACACGTT +TTTTCCAGAA ATTGATTTGG AGAAATATAA ACTTCTGCCA GAATACCCAG GTGTTCTCTC +TGATGTCCAG GAGGAGAAAG GCATTAAGTA CAAATTTGAA GTATATGAGA AGAATGATTA +ATATGAAGGT GTTTTCTAGT TTAAGTTGTT CCCCCTCCCT CTGAAAAAAG TATGTATTTT +TACATTAGAA AAGGTTTTTT GTTGACTTTA GATCTATAAT TATTTCTAAG CAACTTGTTT +TTATTCCCCA CTACTCTTGT CTCTATCAGA TACCATTTAT GAGACATTCT TGCTATAACT +AAGTGCTTCT CCAAGACCCC AACTGAGTCC CCAGCACCTG CTACAGTGAG CTGCCATTCC +ACACCCATCA CATGTGGCAC TCTTGCCAGT CCTTGACATT GTCGGGCTTT TCACATGTTG +GTAATATTTA TTAAAGATGA AGATCCACAT ACCCTTCAAA AAAAAAAAAA AAAAAAAAAA +AAAAAAA +>pseudogene dihydrofolate reductase pseudogene (human) +CTAAACTGCA TTGTCAATGA TTCCCAGAAG ATGGGCATCA TCAGGAATGG GGACCTGCCC +TGACCTCAGC TCAAAAATAA ATTCGATTCC AAAGAATGAC CACACCCTCT TCAGCAGAGG +GTAAAGAAAA TTTAGTATTT TTAATTAGGA AGAACTGGTT CTCGATTACT GAGAAGAATC +AACCTTTAAA GTATATAATT AATTTAGTTG TCAGTAGAGA ATCCAAGGAA CCACCGCAAA +GACCTCCTTT TCTTGACTAA AGTCTGGGTG ATGCCTTAAA ACGTATTGAG CAACTAAAAT +TAGCAAATAA ACAAGACGTG TTTTTTACAG TGGGAGGCAG TTCTGTTTAT AAGGAATCCA +TGAATTGAGA CCATTTTAAA CTATTTGTGA CATGGATCAT GCAGGACTTT CAAAGTGACA +CGTTTTTTTC CCCTAGAAGG TGATTTAGAG AAATATAAAC TTCTCCCAGA ATACCCACAA +GGTGTTGTCT CTGATGTGGA GGAGGAGAAA GGCATTAAGT ACAAATTTGA AGTATATGAA +AAGAATGAT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test_bc070280.fasta Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,18 @@ +>BC0700280 dihydrofolate reductase (human) +TGTAACGAGC GGGCTCGGAG GTCCTCCCGC TGCTGTCATG GTTGGTTCGC TAAACTGCAT +CGTCGCTGTG TCCCAGAACA TGGGCATCGG CAAGAACGGG GACCTGCCCT GGCCACCGCT +CAGGAATGAA TTCAGATATT TCCAGAGAAT GACCACAACC TCTTCAGTAG AAGGTAAACA +GAATCTGGTG ATTATGGGTA AGAAGACCTG GTTCTCCATT CCTGAGAAGA ATCGACCTTT +AAAGGGTAGA ATTAATTTAG TTCTCAGCAG AGAACTCAAG GAACCTCCAC AAGGAGCTCA +TTTTCTTTCC AGAAGTCTAG ATGATGCCTT AAAACTTACT GAACAACCAG AATTAGCAAA +TAAAGTAGAC ATGCTCTGGA TAGTTGGTGG CAGTTCTGTT TATAAGGAAG CCATGAATCA +CCCAGGCCAT CTTAAACTAT TTGTGACAAG GATCATGCAA GACTTTGAAA GTGACACGTT +TTTTCCAGAA ATTGATTTGG AGAAATATAA ACTTCTGCCA GAATACCCAG GTGTTCTCTC +TGATGTCCAG GAGGAGAAAG GCATTAAGTA CAAATTTGAA GTATATGAGA AGAATGATTA +ATATGAAGGT GTTTTCTAGT TTAAGTTGTT CCCCCTCCCT CTGAAAAAAG TATGTATTTT +TACATTAGAA AAGGTTTTTT GTTGACTTTA GATCTATAAT TATTTCTAAG CAACTTGTTT +TTATTCCCCA CTACTCTTGT CTCTATCAGA TACCATTTAT GAGACATTCT TGCTATAACT +AAGTGCTTCT CCAAGACCCC AACTGAGTCC CCAGCACCTG CTACAGTGAG CTGCCATTCC +ACACCCATCA CATGTGGCAC TCTTGCCAGT CCTTGACATT GTCGGGCTTT TCACATGTTG +GTAATATTTA TTAAAGATGA AGATCCACAT ACCCTTCAAA AAAAAAAAAA AAAAAAAAAA +AAAAAAA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test_dup.nuc Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,29 @@ +>BC0700280 dihydrofolate reductase (human) +TGTAACGAGC GGGCTCGGAG GTCCTCCCGC TGCTGTCATG GTTGGTTCGC TAAACTGCAT +CGTCGCTGTG TCCCAGAACA TGGGCATCGG CAAGAACGGG GACCTGCCCT GGCCACCGCT +CAGGAATGAA TTCAGATATT TCCAGAGAAT GACCACAACC TCTTCAGTAG AAGGTAAACA +GAATCTGGTG ATTATGGGTA AGAAGACCTG GTTCTCCATT CCTGAGAAGA ATCGACCTTT +AAAGGGTAGA ATTAATTTAG TTCTCAGCAG AGAACTCAAG GAACCTCCAC AAGGAGCTCA +TTTTCTTTCC AGAAGTCTAG ATGATGCCTT AAAACTTACT GAACAACCAG AATTAGCAAA +TAAAGTAGAC ATGCTCTGGA TAGTTGGTGG CAGTTCTGTT TATAAGGAAG CCATGAATCA +CCCAGGCCAT CTTAAACTAT TTGTGACAAG GATCATGCAA GACTTTGAAA GTGACACGTT +TTTTCCAGAA ATTGATTTGG AGAAATATAA ACTTCTGCCA GAATACCCAG GTGTTCTCTC +TGATGTCCAG GAGGAGAAAG GCATTAAGTA CAAATTTGAA GTATATGAGA AGAATGATTA +ATATGAAGGT GTTTTCTAGT TTAAGTTGTT CCCCCTCCCT CTGAAAAAAG TATGTATTTT +TACATTAGAA AAGGTTTTTT GTTGACTTTA GATCTATAAT TATTTCTAAG CAACTTGTTT +TTATTCCCCA CTACTCTTGT CTCTATCAGA TACCATTTAT GAGACATTCT TGCTATAACT +AAGTGCTTCT CCAAGACCCC AACTGAGTCC CCAGCACCTG CTACAGTGAG CTGCCATTCC +ACACCCATCA CATGTGGCAC TCTTGCCAGT CCTTGACATT GTCGGGCTTT TCACATGTTG +GTAATATTTA TTAAAGATGA AGATCCACAT ACCCTTCAAA AAAAAAAAAA AAAAAAAAAA +AAAAAAA +>pseudogene dihydrofolate reductase pseudogene (human) +CTAAACTGCA TTGTCAATGA TTCCCAGAAG ATGGGCATCA TCAGGAATGG GGACCTGCCC +TGACCTCAGC TCAAAAATAA ATTCGATTCC AAAGAATGAC CACACCCTCT TCAGCAGAGG +GTAAAGAAAA TTTAGTATTT TTAATTAGGA AGAACTGGTT CTCGATTACT GAGAAGAATC +AACCTTTAAA GTATATAATT AATTTAGTTG TCAGTAGAGA ATCCAAGGAA CCACCGCAAA +GACCTCCTTT TCTTGACTAA AGTCTGGGTG ATGCCTTAAA ACGTATTGAG CAACTAAAAT +TAGCAAATAA ACAAGACGTG TTTTTTACAG TGGGAGGCAG TTCTGTTTAT AAGGAATCCA +TGAATTGAGA CCATTTTAAA CTATTTGTGA CATGGATCAT GCAGGACTTT CAAAGTGACA +CGTTTTTTTC CCCTAGAAGG TGATTTAGAG AAATATAAAC TTCTCCCAGA ATACCCACAA +GGTGTTGTCT CTGATGTGGA GGAGGAGAAA GGCATTAAGT ACAAATTTGA AGTATATGAA +AAGAATGAT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/test_pseudogene.fasta Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,11 @@ +>pseudogene dihydrofolate reductase pseudogene (human) +CTAAACTGCA TTGTCAATGA TTCCCAGAAG ATGGGCATCA TCAGGAATGG GGACCTGCCC +TGACCTCAGC TCAAAAATAA ATTCGATTCC AAAGAATGAC CACACCCTCT TCAGCAGAGG +GTAAAGAAAA TTTAGTATTT TTAATTAGGA AGAACTGGTT CTCGATTACT GAGAAGAATC +AACCTTTAAA GTATATAATT AATTTAGTTG TCAGTAGAGA ATCCAAGGAA CCACCGCAAA +GACCTCCTTT TCTTGACTAA AGTCTGGGTG ATGCCTTAAA ACGTATTGAG CAACTAAAAT +TAGCAAATAA ACAAGACGTG TTTTTTACAG TGGGAGGCAG TTCTGTTTAT AAGGAATCCA +TGAATTGAGA CCATTTTAAA CTATTTGTGA CATGGATCAT GCAGGACTTT CAAAGTGACA +CGTTTTTTTC CCCTAGAAGG TGATTTAGAG AAATATAAAC TTCTCCCAGA ATACCCACAA +GGTGTTGTCT CTGATGTGGA GGAGGAGAAA GGCATTAAGT ACAAATTTGA AGTATATGAA +AAGAATGAT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_block_nomismatch.aln Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,14 @@ +CLUSTAL W multiple sequence alignment + +BC070280 CTAAACTGCATCGTCGCTTCCCAGAACATGGGCATCGGCAACGGGTTCCAGATGACCACA +pseudogene CTAAACTGCATTGTCAATTCCCAGAAGATGGGCATCATCAATGGGTTCCAAATGACCACA + +BC070280 ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGACCTGGTTCTCCATT +pseudogene CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAAGAACTGGTTCTCGATT + +BC070280 CCTGAGAAGAATCGACCTTTAAAGGGTATTAATTTAGTTGATGCCTTAAAACTTACTGAA +pseudogene ACTGAGAAGAATCAACCTTTAAAGTATATTAATTTAGTTGATGCCTTAAAACGTATTGAG + +BC070280 CAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGGTT +pseudogene CAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTGTG +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_clustal.aln Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,42 @@ +CLUSTAL W multiple sequence alignment + +BC070280 ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC +pseudogene ------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT + ################################################ + +BC070280 GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA +pseudogene GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA + ### ################## + +BC070280 ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC +pseudogene CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG + ############################################################ + +BC070280 ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC +pseudogene ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC + ############################################# + +BC070280 AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT +pseudogene AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT + ############### + +BC070280 ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT +pseudogene ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT + ################################################ + +BC070280 GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG +pseudogene GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG + + +BC070280 CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA +pseudogene CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA + + +BC070280 CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG +pseudogene CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG + + +BC070280 TACAAATTTGAAGTATATGAGAAGAATGAT +pseudogene TACAAATTTGAAGTATATGAAAAGAATGAT + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_clustal_multi.aln Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,42 @@ +CLUSTAL W multiple sequence alignment + +BC070280 ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC +pseudogene ------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT + ################################################ + +BC070280 GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA +pseudogene GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA + ### ################## + +BC070280 ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC +pseudogene CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG + ############################################################ + +BC070280 ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC +pseudogene ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC + ############################################# + +BC070280 AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT +pseudogene AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT + ############### + +BC070280 ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT +pseudogene ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT + ################################################ + +BC070280 GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG +pseudogene GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG + + +BC070280 CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA +pseudogene CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA + + +BC070280 CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG +pseudogene CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG + + +BC070280 TACAAATTTGAAGTATATGAGAAGAATGAT +pseudogene TACAAATTTGAAGTATATGAAAAGAATGAT + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_codon.txt Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,60 @@ + M V G S L N C I V A V S Q N M G I G K N +BC070280 ATG GTT GGT TCG CTA AAC TGC ATC GTC GCT GTG TCC CAG AAC ATG GGC ATC GGC AAG AAC + - - - - L N C I V N V S Q K M G I I R N +pseudogene --- --- --- --- CTA AAC TGC ATT GTC AAT GAT TCC CAG AAG ATG GGC ATC ATC AGG AAT + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + G D L P W P P L R N E F R Y F Q R M T T +BC070280 GGG GAC CTG CCC TGG CCA CCG CTC AGG AAT GAA TTC AGA TAT TTC CAG AGA ATG ACC ACA + G D L P * P Q L K N K F 2 - F Q R M T T +pseudogene GGG GAC CTG CCC TGA CCT CAG CTC AAA AAT AAA TTC GA- --- TTC CAA AGA ATG ACC ACA + ### ### ### ### ### ### ### + + T S S V E G K Q N L V I M G K K T W F S +BC070280 ACC TCT TCA GTA GAA GGT AAA CAG AAT CTG GTG ATT ATG GGT AAG AAG ACC TGG TTC TCC + P S S A E G K E N L V F L I R K N W F S +pseudogene CCC TCT TCA GCA GAG GGT AAA GAA AAT TTA GTA TTT TTA ATT AGG AAG AAC TGG TTC TCG + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + I P E K N R P L K G R I N L V L S R E L +BC070280 ATT CCT GAG AAG AAT CGA CCT TTA AAG GGT AGA ATT AAT TTA GTT CTC AGC AGA GAA CTC + I T E K N Q P L K Y I I N L V V S R E S +pseudogene ATT ACT GAG AAG AAT CAA CCT TTA AAG TAT ATA ATT AAT TTA GTT GTC AGT AGA GAA TCC + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + K E P P Q G A H F L S R S L D D A L K L +BC070280 AAG GAA CCT CCA CAA GGA GCT CAT TTT CTT TCC AGA AGT CTA GAT GAT GCC TTA AAA CTT + K E P P Q R P P F L D * S L G D A L K R +pseudogene AAG GAA CCA CCG CAA AGA CCT CCT TTT CTT GAC TAA AGT CTG GGT GAT GCC TTA AAA CGT + ### ### ### ### ### + + T E Q P E L A N K V D M L W I V G G S S +BC070280 ACT GAA CAA CCA GAA TTA GCA AAT AAA GTA GAC ATG CTC TGG ATA GTT GGT GGC AGT TCT + I E Q L K L A N K Q D V F F T V G G S S +pseudogene ATT GAG CAA CTA AAA TTA GCA AAT AAA CAA GAC GTG TTT TTT ACA GTG GGA GGC AGT TCT + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + V Y K E A M N H P G H L K L F V T R I M +BC070280 GTT TAT AAG GAA GCC ATG AAT CAC CCA GGC CAT CTT AAA CTA TTT GTG ACA AGG ATC ATG + V Y K E S M N * - D H F K L F V T W I M +pseudogene GTT TAT AAG GAA TCC ATG AAT TGA --- GAC CAT TTT AAA CTA TTT GTG ACA TGG ATC ATG + + + Q D F E S D T F F - P - E I D L E K Y K +BC070280 CAA GAC TTT GAA AGT GAC ACG TTT TTT --- CCA --- GAA ATT GAT TTG GAG AAA TAT AAA + Q D F Q S D T F F S 4 - E G D L E K Y K +pseudogene CAG GAC TTT CAA AGT GAC ACG TTT TTT TCC CCT A-- GAA GGT GAT TTA GAG AAA TAT AAA + + + L L P E Y P - G V L S D V Q E E K G I K +BC070280 CTT CTG CCA GAA TAC CCA --- GGT GTT CTC TCT GAT GTC CAG GAG GAG AAA GGC ATT AAG + L L P E Y P Q G V V S D V E E E K G I K +pseudogene CTT CTC CCA GAA TAC CCA CAA GGT GTT GTC TCT GAT GTG GAG GAG GAG AAA GGC ATT AAG + + + Y K F E V Y E K N D +BC070280 TAC AAA TTT GAA GTA TAT GAG AAG AAT GAT + Y K F E V Y E K N D +pseudogene TAC AAA TTT GAA GTA TAT GAA AAG AAT GAT + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_html.html Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,16 @@ +<pre> +CLUSTAL W multiple sequence alignment + +BC070280 CTAAACTGCATCGTCGCTTCCCAGAACATGGGCATCGGCAACGGGTTCCAGATGACCACA +pseudogene CTAAACTGCATTGTCAATTCCCAGAAGATGGGCATCATCAATGGGTTCCAAATGACCACA + +BC070280 ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGACCTGGTTCTCCATT +pseudogene CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAAGAACTGGTTCTCGATT + +BC070280 CCTGAGAAGAATCGACCTTTAAAGGGTATTAATTTAGTTGATGCCTTAAAACTTACTGAA +pseudogene ACTGAGAAGAATCAACCTTTAAAGTATATTAATTTAGTTGATGCCTTAAAACGTATTGAG + +BC070280 CAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGGTT +pseudogene CAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTGTG + +</pre>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_nogap.fasta Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,20 @@ +>BC070280 +CTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAACGGGGACCTGCCC +CCACCGCTCAGGAATGAATTCTTCCAGAGAATGACCACAACCTCTTCAGTAGAAGGTAAA +CAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCCATTCCTGAGAAGAATCGACCT +TTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTCAAGGAACCTCCACAAGGAGCT +CATTTTCTTTCCAGTCTAGATGATGCCTTAAAACTTACTGAACAACCAGAATTAGCAAAT +AAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCTGTTTATAAGGAAGCCATGAATGGC +CATCTTAAACTATTTGTGACAAGGATCATGCAAGACTTTGAAAGTGACACGTTTTTTCCA +GAAATTGATTTGGAGAAATATAAACTTCTGCCAGAATACCCAGGTGTTCTCTCTGATGTC +CAGGAGGAGAAAGGCATTAAGTACAAATTTGAAGTATATGAGAAGAATGAT +>pseudogene +CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAATGGGGACCTGCCC +CCTCAGCTCAAAAATAAATTCTTCCAAAGAATGACCACACCCTCTTCAGCAGAGGGTAAA +GAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCGATTACTGAGAAGAATCAACCT +TTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCCAAGGAACCACCGCAAAGACCT +CCTTTTCTTGACAGTCTGGGTGATGCCTTAAAACGTATTGAGCAACTAAAATTAGCAAAT +AAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCTGTTTATAAGGAATCCATGAATGAC +CATTTTAAACTATTTGTGACATGGATCATGCAGGACTTTCAAAGTGACACGTTTTTTCCT +GAAGGTGATTTAGAGAAATATAAACTTCTCCCAGAATACCCAGGTGTTGTCTCTGATGTG +GAGGAGGAGAAAGGCATTAAGTACAAATTTGAAGTATATGAAAAGAATGAT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_paml.paml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,23 @@ + 2 570 +BC070280 +ATGGTTGGTTCGCTAAACTGCATCGTCGCTGTGTCCCAGAACATGGGCATCGGCAAGAAC +GGGGACCTGCCCTGGCCACCGCTCAGGAATGAATTCAGATATTTCCAGAGAATGACCACA +ACCTCTTCAGTAGAAGGTAAACAGAATCTGGTGATTATGGGTAAGAAGACCTGGTTCTCC +ATTCCTGAGAAGAATCGACCTTTAAAGGGTAGAATTAATTTAGTTCTCAGCAGAGAACTC +AAGGAACCTCCACAAGGAGCTCATTTTCTTTCCAGAAGTCTAGATGATGCCTTAAAACTT +ACTGAACAACCAGAATTAGCAAATAAAGTAGACATGCTCTGGATAGTTGGTGGCAGTTCT +GTTTATAAGGAAGCCATGAATCACCCAGGCCATCTTAAACTATTTGTGACAAGGATCATG +CAAGACTTTGAAAGTGACACGTTTTTT---CCA---GAAATTGATTTGGAGAAATATAAA +CTTCTGCCAGAATACCCA---GGTGTTCTCTCTGATGTCCAGGAGGAGAAAGGCATTAAG +TACAAATTTGAAGTATATGAGAAGAATGAT +pseudogene +------------CTAAACTGCATTGTCAATGATTCCCAGAAGATGGGCATCATCAGGAAT +GGGGACCTGCCCTGACCTCAGCTCAAAAATAAATTCGA----TTCCAAAGAATGACCACA +CCCTCTTCAGCAGAGGGTAAAGAAAATTTAGTATTTTTAATTAGGAAGAACTGGTTCTCG +ATTACTGAGAAGAATCAACCTTTAAAGTATATAATTAATTTAGTTGTCAGTAGAGAATCC +AAGGAACCACCGCAAAGACCTCCTTTTCTTGACTAAAGTCTGGGTGATGCCTTAAAACGT +ATTGAGCAACTAAAATTAGCAAATAAACAAGACGTGTTTTTTACAGTGGGAGGCAGTTCT +GTTTATAAGGAATCCATGAATTGA---GACCATTTTAAACTATTTGTGACATGGATCATG +CAGGACTTTCAAAGTGACACGTTTTTTTCCCCTA--GAAGGTGATTTAGAGAAATATAAA +CTTCTCCCAGAATACCCACAAGGTGTTGTCTCTGATGTGGAGGAGGAGAAAGGCATTAAG +TACAAATTTGAAGTATATGAAAAGAATGAT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests.xml Sun Nov 09 10:56:21 2025 +0000 @@ -0,0 +1,49 @@ +<macros> + <xml name="tests"> + <tests> + <test expect_num_outputs="1"> + <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" /> + <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" /> + <param name="output_format" value="paml" /> + <output name="output_file" file="outputs/expected_paml.paml" ftype="phylip" /> + </test> + <test expect_num_outputs="1"> + <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" /> + <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" /> + <param name="output_format" value="clustal" /> + <output name="output_file" file="outputs/expected_clustal.aln" ftype="clustal" /> + </test> + <test expect_num_outputs="1"> + <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" /> + <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" /> + <param name="output_format" value="fasta" /> + <param name="remove_gaps" value="true" /> + <output name="output_file" file="outputs/expected_nogap.fasta" ftype="fasta" /> + </test> + <test expect_num_outputs="1"> + <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" /> + <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" /> + <param name="output_format" value="codon" /> + <output name="output_file" file="outputs/expected_codon.txt" ftype="txt" /> + </test> + <test expect_num_outputs="1"> + <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" /> + <param name="nucleotide_fastas" value="inputs/test_bc070280.fasta,inputs/test_pseudogene.fasta" ftype="fasta" /> + <param name="output_format" value="clustal" /> + <output name="output_file" file="outputs/expected_clustal_multi.aln" ftype="clustal" /> + </test> + <test expect_num_outputs="2"> + <param name="protein_alignment" value="inputs/test.aln" ftype="clustal" /> + <param name="nucleotide_fastas" value="inputs/test.nuc" ftype="fasta" /> + <param name="output_format" value="clustal" /> + <param name="show_only_blocks" value="true" /> + <param name="remove_mismatches" value="true" /> + <param name="genetic_code" value="2" /> + <param name="html_output" value="true" /> + <param name="suppress_stderr" value="true" /> + <output name="output_file" file="outputs/expected_block_nomismatch.aln" ftype="clustal" /> + <output name="html_output_file" file="outputs/expected_html.html" ftype="html" /> + </test> + </tests> + </xml> +</macros>
