# HG changeset patch
# User bgruening
# Date 1710920092 0
# Node ID bf28a8cff401735fd2b8d046587c60622d8f089a
# Parent 6f28e90db9322279f399f84b02576d17695a90e8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mafft commit 2f6456c314c010fd73f5eeaf809a9afce47353af
diff -r 6f28e90db932 -r bf28a8cff401 macros.xml
--- a/macros.xml Tue Oct 31 15:48:53 2023 +0000
+++ b/macros.xml Wed Mar 20 07:34:52 2024 +0000
@@ -1,22 +1,55 @@
- 7.508
- 1
+ 7.520
+ 0
22.01
+
+ fasta36
MAFFT
-
- mafft
- fasta3
-
+
+ mafft
+ fasta3
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
- 10.1093/molbev/mst010
-
+
+ 10.1093/nar/gkf436
+ 10.1093/nar/gki198
+ 10.1093/molbev/mst010
+
diff -r 6f28e90db932 -r bf28a8cff401 mafft-add.xml
--- a/mafft-add.xml Tue Oct 31 15:48:53 2023 +0000
+++ b/mafft-add.xml Wed Mar 20 07:34:52 2024 +0000
@@ -16,43 +16,53 @@
'$outputAlignment'
- #if $map == '--mapout'
- && mv '${inputSequences}.map' '$outputmap'
+ $keeplength
+ $mapout
+ $reorder
+ input_dir/alignment > '$outputAlignment'
+
+ #if $mapout
+ && mv input_dir/sequence.map '$outputmap'
#end if
]]>
+
+
+
-
+
-
-
-
+
+
+
-
-
-
+
+
+
@@ -63,11 +73,11 @@
-
+
-
+
diff -r 6f28e90db932 -r bf28a8cff401 mafft.xml
--- a/mafft.xml Tue Oct 31 15:48:53 2023 +0000
+++ b/mafft.xml Wed Mar 20 07:34:52 2024 +0000
@@ -1,305 +1,542 @@
-
-
-Multiple alignment program for amino acid or nucleotide sequences
-
- macros.xml
-
-
-
-
-
-
-
-
-
-
- '$outputAlignment';
-
- #if $getTree == "--treeout"
- mv '${inputSequences}.tree' '$outputTree';
- #end if
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- getTree == True
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- output]
- - G-INS-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information):
- - mafft --globalpair --maxiterate 1000 input [> output]
- - E-INS-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences):
- - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps.
-
- **Speed-oriented methods:**
-
- - FFT-NS-i (iterative refinement method; two cycles only):
- - mafft --retree 2 --maxiterate 2 input [> output]
- - FFT-NS-i (iterative refinement method; max. 1000 iterations):
- - mafft --retree 2 --maxiterate 1000 input [> output]
- - FFT-NS-2 (fast; progressive method):
- - mafft --retree 2 --maxiterate 0 input [> output]
- - FFT-NS-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree):
- - mafft --retree 1 --maxiterate 0 input [> output]
- - NW-NS-i (iterative refinement method without FFT approximation; two cycles only):
- - mafft --retree 2 --maxiterate 2 --nofft input [> output]
- - NW-NS-2 (fast; progressive method without the FFT approximation):
- - mafft --retree 2 --maxiterate 0 --nofft input [> output]
- - NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm):
- - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output]
-
- **Options:**
-
- - --auto Automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2)
- - --adjustdirection Generate reverse complement sequences, as necessary, and align them together with the remaining sequences. In the case of protein alignment, these options are just ignored.
- - --op Gap opening penalty, default: 1.53
- - --ep Offset (works like gap extension penalty), default: 0.0
- - --maxiterate Maximum number of iterative refinement, default: 0
- - --clustalout Output: clustal format, default: fasta
- - --retree number Guide tree is built number times in the progressive stage. Valid with 6mer distance. Default: 2
- ]]>
-
-
-
+
+
+ Multiple alignment program for amino acid or nucleotide sequences
+
+ macros.xml
+
+
+
+
+
+
+
+
+ $sequence_count
+ echo "WARNING = Chosen groupsize number larger than number of input sequences. Not recommended for MAFFT." &&
+ #end if
+ #end if
+ #end if
+
+ ## run MAFFT with predefined MSA flavours or custom settings
+ #if $flavour.type == "custom"
+ mafft
+ #if $flavour.guidetree.guidetree_generation == "original"
+ #if $flavour.guidetree.dist_flavour.distance_method == "--6merpair"
+ --6merpair
+ --retree $flavour.guidetree.dist_flavour.retree
+ #elif $flavour.guidetree.dist_flavour.distance_method == "--globalpair"
+ --globalpair
+ --weighti $flavour.guidetree.dist_flavour.weighti
+ #if $flavour.guidetree.dist_flavour.treat_unrelated_segments.unalignlevel > 0
+ --allowshift --unalignlevel $flavour.guidetree.dist_flavour.treat_unrelated_segments.unalignlevel
+ #end if
+ $flavour.guidetree.dist_flavour.treat_unrelated_segments.leavegappyregion
+ #elif $flavour.guidetree.dist_flavour.distance_method == "--localpair"
+ --localpair
+ --weighti $flavour.guidetree.dist_flavour.weighti
+ --lop $flavour.guidetree.dist_flavour.lop
+ --lep $flavour.guidetree.dist_flavour.lep
+ --lexp $flavour.guidetree.dist_flavour.lexp
+ #elif flavour.guidetree.dist_flavour.distance_method == "--genafpair"
+ --genafpair
+ --weighti $flavour.guidetree.dist_flavour.weighti
+ --lop $flavour.guidetree.dist_flavour.lop
+ --lep $flavour.guidetree.dist_flavour.lep
+ --lexp $flavour.guidetree.dist_flavour.lexp
+ --LOP $flavour.guidetree.dist_flavour.LOP
+ --LEXP $flavour.guidetree.dist_flavour.LEXP
+ #elif $flavour.guidetree.dist_flavour.distance_method == "--fastapair"
+ --fastapair
+ --weighti $flavour.guidetree.dist_flavour.weighti
+ #end if
+ #elif $flavour.guidetree.guidetree_generation == "parttree"
+ $flavour.guidetree.parttree_selection.parttree_option
+ --retree $flavour.guidetree.parttree_selection.retree
+ --partsize $flavour.guidetree.parttree_selection.partsize
+ #if $flavour.guidetree.parttree_selection.groupsize != -1
+ --groupsize $flavour.guidetree.parttree_selection.groupsize
+ #end if
+ #end if
+ ## progressive alignment calculation
+ --maxiterate $flavour.progressive_alignment_calculation.maxiterate
+ $flavour.progressive_alignment_calculation.fft
+ $flavour.progressive_alignment_calculation.noscore
+ #else
+ $flavour.type
+ #if $flavour.type == "mafft-ginsi" or "--globalpair" in str($flavour.type)
+ #if $flavour.treat_unrelated_segments.unalignlevel > 0
+ --allowshift --unalignlevel $flavour.treat_unrelated_segments.unalignlevel
+ #end if
+ $flavour.treat_unrelated_segments.leavegappyregion
+ #end if
+ #end if
+
+ ## handle scoring matrix
+ $datatype_selection.datatype
+ #if $datatype_selection.datatype != ""
+ #if $datatype_selection.scoring_matrix.type == "custom"
+ --aamatrix '$datatype_selection.scoring_matrix.aamatrix'
+ #else
+ $datatype_selection.scoring_matrix.type $datatype_selection.scoring_matrix.coefficient
+ #end if
+ $datatype_selection.fmodel
+ ## gap penalties
+ #if $datatype_selection.gap_costs.use_defaults == "no"
+ --ep $datatype_selection.gap_costs.ep --op $datatype_selection.gap_costs.op
+ #end if
+ #end if
+
+
+ ## output options
+ $reorder
+ $outputFormat
+ $treeout
+
+ ## specify threads to use
+ ## disable multithreading during iterative refinement step for reproducibility
+ ## cmp. https://mafft.cbrc.jp/alignment/software/multithreading.html
+ --thread \${GALAXY_SLOTS:-1} --threadit 0
+
+ input.fa > '$outputAlignment'
+
+ ## Output alignment tree
+ #if $treeout
+ && mv input.fa.tree '$outputTree'
+ #end if
+ ]]>
+
+ > input.fa
+ #end for
+ #elif $input.mapping == "merge"
+ #for $batch in $input.batches:
+ #for $dataset in $batch.inputs:
+cat $dataset >> input.fa
+ #end for
+ #end for
+ #end if
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ treeout
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `__)
+- JTT matrices with any point accepted mutation (PAM) rate (`Jones, Taylor and Thornton, 1992 `__)
+- PAM-based matrices optimized for transmembrane proteins (`Jones, Taylor and Thornton, 1994 `__)
+
+For nucleic acid sequence alignment, MAFFT uses Kimura's two parameter model (`Kimura 1980 `__)
+with a transitions to transversions ratio of 2 (kappa 2), but lets you configure the PAM value.
+
+The tool can also try to autodetect the sequence type from the input(s).
+In this mode, it will use the BLOSUM 62 matrix if it detects amino acids input, and the Kimura kappa 2 PAM200 matrix for nucleic acids.
+
+
+Pre-configured MSA methods
+--------------------------
+
+From the `MAFFT man page `__, an overview of the different predefined flavours of the tool.
+
+**Accuracy-oriented methods:**
+
+- *L-INS-i* (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information):
+
+ - mafft --localpair --maxiterate 1000 input [> output]
+- *G-INS-i* (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information):
+
+ - mafft --globalpair --maxiterate 1000 input [> output]
+- *E-INS-i* (suitable for sequences containing large unalignable regions; recommended for <200 sequences):
+
+ - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps.
+
+**Speed-oriented methods:**
+
+- *FFT-NS-i* (iterative refinement method; two cycles only):
+
+ - mafft --retree 2 --maxiterate 2 input [> output]
+- *FFT-NS-2* (fast; progressive method):
+
+ - mafft --retree 2 --maxiterate 0 input [> output]
+- *NW-NS-i* (iterative refinement method without FFT approximation; two cycles only):
+
+ - mafft --retree 2 --maxiterate 2 --nofft input [> output]
+- *NW-NS-2* (fast; progressive method without the FFT approximation):
+
+ - mafft --retree 2 --maxiterate 0 --nofft input [> output]
+- *NW-NS-PartTree-1* (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm):
+
+ - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output]
+- *FFT-NS-1* (very fast; recommended for >2000 sequences; progressive method with a rough guide tree):
+
+ - mafft --retree 1 --maxiterate 0 input [> output]
+ ]]>
+
+
diff -r 6f28e90db932 -r bf28a8cff401 test-data/mafft_auto_linsi.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mafft_auto_linsi.aln Wed Mar 20 07:34:52 2024 +0000
@@ -0,0 +1,504 @@
+> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
+M------------------NGTE-G------DNFYVPF----SNKTGLARSPYEYPQY--
+--------------------------------------------------YLAEPW----
+---------------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLA
+MANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERY
+IVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQCSCG
+PDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------STQKAEKEVTRMVVLMVIGFLVCWVPYAS
+VAFYIFT---HQGS-DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTL-----
+CC-----GKNPLGDDE-SGA-STSK-TEVSSVS-TSPVSPA-------------------
+------------------------------------------------------------
+---------------
+> 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94]
+M------------------NGTE-G------PNFYVPF----SNITGVVRSPFEQPQY--
+--------------------------------------------------YLAEPW----
+---------------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
+VADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERY
+VVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQCSCG
+IDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------TTQKAEKEVTRMVIIMVIFFLICWLPYAS
+VAMYIFT---HQGS-NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSL-----
+CC-----GKNPLGDDE-ASA-TASK-TETSQVA-PA------------------------
+------------------------------------------------------------
+---------------
+> 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9
+M------------------NGTE-G------INFYVPM----SNKTGVVRSPFEYPQY--
+--------------------------------------------------YLAEPW----
+---------------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLA
+VADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERY
+IVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQCSCG
+PDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------TTQKAEKEVTRMVILMVLGFMLAWTPYAV
+VAFWIFT---NKGA-DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTI-----
+CC-----GKNPFGDEDVSSTVSQSK-TEVSSVS-SSQVSPA-------------------
+------------------------------------------------------------
+---------------
+> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
+M------------------NGTE-G------KNFYVPM----SNRTGLVRSPFEYPQY--
+--------------------------------------------------YLAEPW----
+---------------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLA
+VAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERY
+IVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQCSCG
+PDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------STQKAEREVTKMVILMVFGFLIAWTPYAT
+VAAWIFF---NKGA-DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTI-----
+FC-----GKNPLGDDE-SSTVSTSK-TEVSSVS-PA------------------------
+------------------------------------------------------------
+---------------
+> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
+M------------------NGTE-G------NNFYVPL----SNRTGLVRSPFEYPQY--
+--------------------------------------------------YLAEPW----
+---------------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLA
+VAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERY
+IVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQCSCG
+PDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------STQKAEREVTKMVILMVLGFLVAWTPYAT
+VAAWIFF---NKGA-AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTL-----
+FC-----GKNPLGDEE-SSTVSTSK-TEVSSVS-PA------------------------
+------------------------------------------------------------
+---------------
+> 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208-
+M------------------KQVP-EFH----EDFYIPIPL--DINNLSAYSPFLVPQD--
+--------------------------------------------------HLGNQG----
+---------------IFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLS
+IANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERW
+LVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLFG-WS-----RYIPEGLQCSCG
+PDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------STQKAEREVTKMVVVMVLGFLVCWAPYAS
+FSLWIVS---HRGE-EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMV-----
+C------GKN-IEEDE-AST-SSQV-TQVSSVA-PEK-----------------------
+------------------------------------------------------------
+---------------
+> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
+M------------------RKMS-E------EEFYL-------FKNISSVGPWDGPQY--
+--------------------------------------------------HIAPVW----
+---------------AFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVS
+FGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERY
+IVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-WS-----RFIPEGLQCSCG
+PDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------TTQKAEREVSRMVVVMVGSFCVCYVPYAA
+FAMYMVN---NRNH-GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMV-----
+C------GKA-MTDES-DTC-SSQK-TEVSTVS-STQVGPN-------------------
+------------------------------------------------------------
+---------------
+> 8=opsin, greensensitive human (fragment) S07060
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+--DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERW
+LVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTSCG
+PDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------STQKAEKEVTRMVVVMVLAFC--------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------
+> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
+MAQQWSLQRLAGRHPQDSYEDSTQS------SIFTY-------TNSNSTRGPFEGPNY--
+--------------------------------------------------HIAPRW----
+---------------VYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLA
+VADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERW
+MVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTSCG
+PDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------STQKAEKEVTRMVVVMVLAFCFCWGPYAF
+FACFAAA---NPGY-PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF-----
+-------GKK-VDDGS-ELS-SASK-TEVSSVS---SVSPA-------------------
+------------------------------------------------------------
+---------------
+> 10== Z68193 1 human Red Opsin <>[]
+MAQQWSLQRLAGRHPQDSYEDSTQS------SIFTY-------TNSNSTRGPFEGPNY--
+--------------------------------------------------HIAPRW----
+---------------VYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLA
+VADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERW
+LVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-WS-----RYWPHGLKTSCG
+PDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------STQKAEKEVTRMVVVMIFAYCVCWGPYTF
+FACFAAA---NPGY-AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF-----
+-------GKK-VDDGS-ELS-SASK-TEVSSVS---SVSPA-------------------
+------------------------------------------------------------
+---------------
+> 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92]
+MTEAWNVAVFAARRSRDD-DDTTRG------SVFTY-------TNTNNTRGPFEGPNY--
+--------------------------------------------------HIAPRW----
+---------------VYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLA
+FVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERW
+FVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-WS-----RYWPHGLKTSCG
+PDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------STQKAEREVSRMVVVMIVAFCICWGPYAS
+FVSFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF-----
+-------GKK-VDDGS-EAS-TTSR-TEVSSVS-NSSVAPA-------------------
+------------------------------------------------------------
+---------------
+> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
+MA-AWE-AAFAARRRHEE-EDTTRD------SVFTY-------TNSNNTRGPFEGPNY--
+--------------------------------------------------HIAPRW----
+---------------VYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLA
+VADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERW
+FVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-WS-----RYWPHGLKTSCG
+PDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------STQKAEKEVSRMVVVMIVAYCFCWGPYTF
+FACFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF-----
+-------GKK-VDDGS-EVS-T-SR-TEVSSVS-NSSVSPA-------------------
+------------------------------------------------------------
+---------------
+> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
+-----------------------MS------SNSSQ-------APPNGTPGPFDGPQW--
+-------------------------------------------------PYQAPQS----
+---------------TYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLA
+VADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERY
+VVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-WS-----SYVPEGLRTSCG
+PNWYTGGSN--NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD-------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------TTQRAEREVTRMVIVMVMAFLLCWLPYST
+FALVVAT---HKGI-IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEML-----
+CCGYQ--PQR-TGKAS-PGT-PGPH-ADVTAAGLRNKVMPAHPV----------------
+------------------------------------------------------------
+---------------
+> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
+M------------------ESGNVS------SSLF------------GNVSTALRPEA--
+-------------------------RLSA---E---TRLLGWNVPPEELRHIPEHWLTYP
+----------EPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLA
+FCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRF
+NVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYLTSCT
+FDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVES
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS-------NVDKNKETAEIRIAKAAITICFLFFCSWTPYGV
+MSLIGAF---GDKT-LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWL--
+AL-----NEK-APESS-AVA-STST-TQEPQQT---------------------------
+TAA---------------------------------------------------------
+---------------
+> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
+M------------------EYHNVS------SVL-------------GNVSSVLRPDA--
+-------------------------RLSA---E---SRLLGWNVPPDELRHIPEHWLIYP
+----------EPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLA
+FCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRY
+NVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYLTSCT
+FDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVDS
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS-------NVDKSKEAAEIRIAKAAITICFLFFASWTPYGV
+MSLIGAF---GDKT-LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWL--
+AI-----SEK-APESR-AAI-STST-TQEQQQT---------------------------
+TAA---------------------------------------------------------
+---------------
+> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
+M------------------EPLC------------------------NASEPPLRPEA--
+-------------------------R-SSGNGD---LQFLGWNVPPDQIQYIPEHWLTQL
+----------EPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLA
+VFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRY
+NVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCS
+FDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMNVES
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGV
+MSLIGAF---GDKS-LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWL--
+GV-----NEK-SGEIS-SAQ-STTT-QEQQQTT---------------------------
+AA----------------------------------------------------------
+---------------
+> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
+M------------------DALC------------------------NASEPPLRPEA--
+-------------------------RMSSGSDE---LQFLGWNVPPDQIQYIPEHWLTQL
+----------EPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLA
+VFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRY
+NVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCS
+FDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMNVES
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGV
+MSLIGAF---GDKS-LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWL--
+GV-----NEK-SGEAS-SAQ-STTT-QEQTQQT---------------------------
+SAA---------------------------------------------------------
+---------------
+> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1
+M------------------TNAT-------------------------------GPQMAY
+-------YG--------------AASMDFGYPE---GVSIVDFVRPEIKPYVHQHWYNYP
+----------PVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLA
+LSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRY
+NIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-WG-----NYILEGILDSCS
+YDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMNVST
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS--------NEADAQRAEIRIAKTALVNVSLWFICWTPYAL
+ISLKGVM---GDTS-GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWF--
+CV-----HET-ETKSN-DDS-QSNS-TVAQDKA---------------------------
+------------------------------------------------------------
+---------------
+> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1
+M------------------ANVT-------------------------------GPQMAF
+-------YG--------------SGAATFGYPE---GMTVADFVPDRVKHMVLDHWYNYP
+----------PVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLA
+LSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRY
+NIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-WG-----SYTLEGILDSCS
+YDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMNVTN
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS--------NEAETQRAEIRIAKTALVNVSLWFICWTPYAA
+ITIQGLL---GNAE-GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWF--
+CV-----HEK-DPNDV-EEN-QSSN-TQTQEKS---------------------------
+------------------------------------------------------------
+---------------
+> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
+M------------------ESFA-------------------------VAAAQLGPHF--
+------------------------APLS--------NGSVVDKVTPDMAHLISPYWNQFP
+----------AMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLA
+ISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRY
+QVIVKGMAGR-PMTIPLALGKM---------------------------YVPEGNLTSCG
+IDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKS
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLV
+INCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC--
+VF-----GKV-DDGKS-SDA-QSQA-TASEAES------KA-------------------
+------------------------------------------------------------
+---------------
+> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
+M------------------ESFA-------------------------VAAAQLGPHF--
+------------------------APLS--------NGSVVDKVTPDMAHLISPYWNQFP
+----------AMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLA
+ISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRY
+QVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNLTSCG
+IDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKS
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLV
+INCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC--
+VF-----GKV-DDGKS-SDA-QSQA-TASEAES------KA-------------------
+------------------------------------------------------------
+---------------
+> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
+M------------------DSFA-------------------------AVATQLGPQF--
+------------------------AAPS--------NGSVVDKVTPDMAHLISPYWDQFP
+----------AMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLA
+ISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRY
+QVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNLTSCG
+IDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKS
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS--------SEDADKSAEGKLAKVALVTISLWFMAWTPYLV
+INCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC--
+VF-----GKV-DDGKS-SEA-QSQA-TTSEAES------KA-------------------
+------------------------------------------------------------
+---------------
+> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
+M------------------ERSH--------------------LPETPFDLAHSGPRF--
+------------------------QAQSSG------NGSVLDNVLPDMAHLVNPYWSRFA
+----------PMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLA
+FSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRY
+NVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-WS-----AYVPEGNLTACS
+IDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMNVKS
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS--------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLV
+ICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMC--
+VF-----GNT-DEPKP-DAP-ASDTETTSEADS------KA-------------------
+------------------------------------------------------------
+---------------
+> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
+M------------------ERSL--------------------LPEPPLAMALLGPRF--
+------------------------EAQTGG------NRSVLDNVLPDMAPLVNPHWSRFA
+----------PMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLA
+FSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRY
+NVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-WS-----SYVPEGNLTACS
+IDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMNVKS
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS--------SEDCDKSAENKLAKVALTTISLWFMAWTPYLI
+ICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMC--
+VC-----GTT-DEPKP-DAP-PSDTETTSEAES------KD-------------------
+------------------------------------------------------------
+---------------
+> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
+M-------------------------------------------------IAVSGPSY--
+------------------------EAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYP
+----------PLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLA
+ISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRY
+NVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-WN-----RYVPEGNMTACG
+TDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMNVAS
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRS--------SENQNTSAECKLAKVALMTISLWFMAWTPYLV
+INFSGIF---NL-V-KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSL--
+AC-----AA--EPSSD-AVS-TTSG-TTTVTDN------EK------------------S
+NA----------------------------------------------------------
+---------------
+> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
+M------------------ANQL---------------------------------SY--
+------------------------SSLGWPYQP---NASVVDTMPKEMLYMIHEHWYAFP
+----------PMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLA
+FSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRY
+NVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-WS-----RYVPEGNLTSCT
+VDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMNVAS
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRA-------NADQQKQSAECRLAKVAMMTVGLWFMAWTPYLI
+ISWAGVF---SSGT-RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSL--
+AC-----GSG-ESGSD-VKS-EASA-TTTMEEK------PKIPEA---------------
+------------------------------------------------------------
+---------------
+> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
+M------------------VEST----------------------TLVNQTWWYNPTV--
+--------------------------------------------------DIHPHWAKFD
+----------PIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLA
+MSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRY
+NVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-WG-----AYVPEGILTSCS
+FDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLNAKE
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRK---------AQAGASAEMKLAKISMVIITQFMLSWSPYAI
+IALLAQF---GPAE-WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLT
+CCQFD------EKECE-DAN-DAEE-EVVASER----GGESRDAAQMKEMMAMMQKMQAQ
+QAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAP
+QGAPPQGVDNQAYQA
+> 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93]
+M------------------GRDL-----------------------RDNETWWYNPSI--
+--------------------------------------------------VVHPHWREFD
+----------QVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLA
+FSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRY
+NVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-WG-----AYTLEGVLCNCS
+FDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLNAKE
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------LRK---------AQAGANAEMRLAKISIVIVSQFLLSWSPYAV
+VALLAQF---GPLE-WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLT
+CCQFD------DKETE-DDK-DAET-EIPAGES--SDAAPSADAAQMKEMMAMMQKMQQQ
+QAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP-----
+-AAPPQGVDNQAYQA
+> 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
+M-----------------------------------------------------------
+--------------------P---HLLS--------GFL-------EVTASPAPTWDAPP
+DNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLA
+LADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRY
+LGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLIS
+QDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG-------
+-------FPRVQPESVISLNG---------------------------------------
+-----------------------VVKLQKE-------------------VEECAN-----
+-------------LSRLLKHE------RKNISIFKREQKAATTLGIIVGAFTVCWLPFFL
+LSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLL-----
+QCQYRNINRK-------LSAAGMHE-ALKLAER------PERSEFVL------------Q
+NSDH--------------------------------------------------------
+-------CGKKGHDT
+> 31=p A47425 serotonin receptor 5HT-7 - rat
+M-----------------------------------------------------------
+--------------------P---HLLS--------GFL-------EVTASPAPTWDAPP
+DNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLA
+LADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRY
+LGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLIS
+QDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG-------
+-------FPRVQPESVISLNG---------------------------------------
+-----------------------VVKLQKE-------------------VEECAN-----
+-------------LSRLLKHE------RKNISIFKREQKAATTLGIIVGAFTVCWLPFFL
+LSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLL-----
+QCQYRNINRK-------LSAAGMHE-ALKLAER------PERSEFVL------------Q
+NSDH--------------------------------------------------------
+-------CGKKGHDT
+> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
+M------------------DVLSPG-------------------------------QG--
+------------------------NNTTSPPAPF-E---------------TGGNTTGIS
+----------DVTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLA
+VTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRY
+WAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTIS
+KDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT
+RHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALC-----------------
+-------------------------------ANGAVRQGDDGAALEVIEVHRVGNSKEHL
+PLPSEAG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFI
+VALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII-----
+KCKFCRQ-----------------------------------------------------
+------------------------------------------------------------
+---------------
+> 33=p A35181 serotonin receptor class 1A - rat
+M------------------DVFSFG-------------------------------QG--
+------------------------NNTTASQEPF-G---------------TGGNVTSIS
+----------DVTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLA
+VTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRY
+WAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTIS
+KDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT
+SLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPC-----------------
+-------------------------------TNGAVRQGDDEATLEVIEVHRVGNSKEHL
+PLPSESG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFI
+VALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII-----
+KCKFCRR-----------------------------------------------------
+------------------------------------------------------------
+---------------
+> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
+M------------------ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLV
+TSDFNDSYGLTGQFINGSHSSRSRDNASANDT--------------SATNMTDDRYWSLT
+----------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLA
+VADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRY
+WAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIIS
+QDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKT
+EETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN
+ANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK--
+-----------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFI
+IALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL-----
+FGKYRRGHR---------------------------------------------------
+------------------------------------------------------------
+---------------
+> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
+M------------------ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLV
+TSDFNDSYGLTGQFINGSHSSRSRDNASANDT--------------SATNMTDDRYWSLT
+----------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLA
+VADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRY
+WAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIIS
+QDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKT
+EETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN
+ANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK--
+-----------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFI
+IALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL-----
+FGKYRRGHR---------------------------------------------------
+------------------------------------------------------------
+---------------
+> 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi
+M------------------EGAE-GQEELDWEALYLRLPL--------------------
+------------------------QNCSWNSTGWEPNW--------NVTVVPNTTWWQAS
+-----APFDTPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLA
+VADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRY
+WAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVS
+QDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARGGVGP
+-------PP-----------------------------------------------VPAG
+GALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSCANGLEAD
+PPTTGYGAVAAAYYPSLVRRK------PKEAADSKRERKAAKTLAIITGAFVACWLPFFV
+LAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLL-----
+CGRRVRRRRA-----------------------------PQ-------------------
+------------------------------------------------------------
+---------------
diff -r 6f28e90db932 -r bf28a8cff401 test-data/mafft_custom_original.clustal.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mafft_custom_original.clustal.aln Wed Mar 20 07:34:52 2024 +0000
@@ -0,0 +1,496 @@
+CLUSTAL format alignment by MAFFT G-INS-i (v7.520)
+
+
+1== MN----------------------------------------------------------
+2== MN----------------------------------------------------------
+3== MN----------------------------------------------------------
+4=p MN----------------------------------------------------------
+5=p MN----------------------------------------------------------
+6== MK----------------------------------------------------------
+7== MR----------------------------------------------------------
+8=opsin, ------------------------------------------------------------
+9== MA------QQWS-LQRLAGRHPQDS--------------------------YED------
+10== MA------QQWS-LQRLAGRHPQDS--------------------------YED------
+11== MT------EAWNVAVFAARRSRDD----------------------------DD------
+12== MA-------AWE-AAFAARRRHEE----------------------------ED------
+13== MS----------------------------------------------------------
+14== MESGNVS-----------------------------------------------------
+15== MEYHNVS-----------------------------------------------------
+16== ME----------------------------------------------------------
+17== MD----------------------------------------------------------
+18== MT----------------------------------------------------------
+19== MA----------------------------------------------------------
+20== MESF--------------------------------------------------------
+21== MESF--------------------------------------------------------
+22== MDSF--------------------------------------------------------
+23== MERSHLP---------------------------------------------------ET
+24== MERSLLP---------------------------------------------------EP
+25== MI----------------------------------------------------------
+26== M-----------------------------------------------------------
+27== MV----------------------------------------------------------
+28== MG----------------------------------------------------------
+29== MMDVN-----------SSGRPDLYGHLRSFLLPEVGRGLPDLSPDG------------GA
+30== M-----------------------------------------------------------
+31=p M-----------------------------------------------------------
+32== MDVLS-------------------------------------------------------
+33=p MDVFS-------------------------------------------------------
+34== MANFTFGDLALD-VARMGGLASTPSGLRS-----TGLTTPGLSPTGLVTSDFNDSYGLTG
+35=p MANFTFGDLALD-VARMGGLASTPSGLRS-----TGLTTPGLSPTGLVTSDFNDSYGLTG
+36== ME----------------------------------------------------------
+
+
+1== ----GTEG--DNFY------------VPFSNKTG----------------------LARS
+2== ----GTEG--PNFY------------VPFSNITG----------------------VVRS
+3== ----GTEG--INFY------------VPMSNKTG----------------------VVRS
+4=p ----GTEG--KNFY------------VPMSNRTG----------------------LVRS
+5=p ----GTEG--NNFY------------VPLSNRTG----------------------LVRS
+6== ----QVPEFHEDFY------IPIP--LDINNLSA------------------------YS
+7== ----KMSE--EEFY------------L-FKNISS----------------------V--G
+8=opsin, ------------------------------------------------------------
+9== ----STQS--SIFT------------YTNSNSTR-------------------------G
+10== ----STQS--SIFT------------YTNSNSTR-------------------------G
+11== ----TTRG--SVFT------------YTNTNNTR-------------------------G
+12== ----TTRD--SVFT------------YTNSNNTR-------------------------G
+13== ----SNSS------------------QAPPNGTP-------------------------G
+14== SSLFGNVS--TALR-------------PEARLSA---E------TRLLGWNVPPEELRHI
+15== SVL-GNVS--SVLR-------------PDARLSA---E------SRLLGWNVPPDELRHI
+16== -PLCNASE--PPLR-------------PEAR-SSGNGD------LQFLGWNVPPDQIQYI
+17== -ALCNASE--PPLR-------------PEARMSSGSDE------LQFLGWNVPPDQIQYI
+18== ----NATG--PQMAY-----------YGAASMDFGYPE------GVSIVDFVRPEIKPYV
+19== ----NVTG--PQMAF-----------YGSGAATFGYPE------GMTVADFVPDRVKHMV
+20== AVAAAQLG--PHFA----------------PLS-----------NGSVVDKVTPDMAHLI
+21== AVAAAQLG--PHFA----------------PLS-----------NGSVVDKVTPDMAHLI
+22== AAVATQLG--PQFA----------------APS-----------NGSVVDKVTPDMAHLI
+23== PFDLAHSG--PRFQ----------------AQSSG---------NGSVLDNVLPDMAHLV
+24== PLAMALLG--PRFE----------------AQTGG---------NRSVLDNVLPDMAPLV
+25== ----AVSG--PSYE----------------AFSYGGQARFN---NQTVVDKVPPDMLHLI
+26== -----ANQ--LSYS----------------SLGWPYQP------NASVVDTMPKEMLYMI
+27== ----ESTT--------------------LVNQTWWY--------NPTVD----------I
+28== ----RDLR---------------------DNETWWY--------NPSIV----------V
+29== DPVAGSWA--PHLL------------S---EVTASPAPTWDAPPDNASG-----------
+30== ----------PHLL------------SGFLEVTASPAPTWDAPPDNVSG-----------
+31=p ----------PHLL------------SGFLEVTASPAPTWDAPPDNVSG-----------
+32== --------------------------PGQGNNTTSPPAPFETGGNTTGI-----------
+33=p --------------------------FGQGNNTTASQEPFGTGGNVTSI-----------
+34== QFINGSHS--SRSRD-----------NASANDT-----------SATNM----------T
+35=p QFINGSHS--SRSRD-----------NASANDT-----------SATNM----------T
+36== ----GAEG--QEELDWEALYLRLPLQNCSWNSTGWEPN-----WNVTVV----------P
+
+
+1== PYEYPQY------YLAEPWKYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYIL
+2== PFEQPQY------YLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYIL
+3== PFEYPQY------YLAEPWKYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYIL
+4=p PFEYPQY------YLAEPWQFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFIL
+5=p PFEYPQY------YLAEPWQFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFIL
+6== PFLVPQD------HLGNQGIFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYIL
+7== PWDGPQY------HIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYIL
+8=opsin, ------------------------------------------------------------
+9== PFEGPNY------HIAPRWVYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWIL
+10== PFEGPNY------HIAPRWVYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWIL
+11== PFEGPNY------HIAPRWVYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWIL
+12== PFEGPNY------HIAPRWVYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWIL
+13== PFDGPQWP-----YQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYIL
+14== PEHWLTY------PEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILV
+15== PEHWLIY------PEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILV
+16== PEHWLTQ------LEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFV
+17== PEHWLTQ------LEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFV
+18== HQHWYNY------PPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILV
+19== LDHWYNY------PPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLI
+20== SPYWNQF------PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLV
+21== SPYWNQF------PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLV
+22== SPYWDQF------PAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLV
+23== NPYWSRF------APMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLV
+24== NPHWSRF------APMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLV
+25== DANWYQY------PPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFV
+26== HEHWYAF------PPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLV
+27== HPHWAKF------DPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFI
+28== HPHWREF------DQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFI
+29== ---------CGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLI
+30== ---------CGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLI
+31=p ---------CGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLI
+32== -------------SDVTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLI
+33=p -------------SDVTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLI
+34== DDRYWSL------TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLI
+35=p DDRYWSL------TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLI
+36== NTTWWQASAPFD-TPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLI
+
+
+1== LNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLA
+2== LNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLA
+3== VNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLA
+4=p VNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLA
+5=p VNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLA
+6== VNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVA
+7== VNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLA
+8=opsin, ------DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIIS
+9== VNLAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIIS
+10== VNLAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIIS
+11== VNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIIS
+12== VNLAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIIS
+13== VNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILA
+14== INLAFCDFMMM-VK-TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIA
+15== INLAFCDFMMM-IK-TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIA
+16== LNLAVFDLIMC-LK-APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIG
+17== LNLAVFDLIMC-LK-APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIG
+18== VNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMIS
+19== VNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMIS
+20== INLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMIS
+21== INLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMIS
+22== INLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMIS
+23== LNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIA
+24== LNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIA
+25== INLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIA
+26== VNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMIT
+27== INLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMIS
+28== INLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMIS
+29== VSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVIS
+30== VSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVIS
+31=p VSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVIS
+32== GSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIA
+33=p GSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIA
+34== LSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIA
+35=p LSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIA
+36== LSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIA
+ . : : * : :
+
+1== IERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQ
+2== IERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQ
+3== IERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQ
+4=p IERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQ
+5=p IERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQ
+6== FERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLFG-WS-----RYIPEGLQ
+7== FERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-WS-----RFIPEGLQ
+8=opsin, WERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLK
+9== WERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLK
+10== WERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-WS-----RYWPHGLK
+11== WERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-WS-----RYWPHGLK
+12== WERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-WS-----RYWPHGLK
+13== LERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-WS-----SYVPEGLR
+14== YDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYL
+15== YDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYL
+16== YDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYL
+17== YDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYL
+18== FDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-WG-----NYILEGIL
+19== FDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-WG-----SYTLEGIL
+20== LDRYQVIVKGMAGR-PMTIPLALGKM---------------------------YVPEGNL
+21== LDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNL
+22== LDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNL
+23== FDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-WS-----AYVPEGNL
+24== FDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-WS-----SYVPEGNL
+25== FDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-WN-----RYVPEGNM
+26== LDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-WS-----RYVPEGNL
+27== IDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-WG-----AYVPEGIL
+28== IDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-WG-----AYTLEGVL
+29== IDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKV
+30== IDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKV
+31=p IDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKV
+32== LDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDA
+33=p LDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDA
+34== MDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGT
+35=p MDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGT
+36== LDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLR
+ :*: : : .
+
+1== CSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA---
+2== CSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA---
+3== CSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA---
+4=p CSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA---
+5=p CSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA---
+6== CSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA---
+7== CSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA---
+8=opsin, TSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE---
+9== TSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE---
+10== TSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE---
+11== TSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE---
+12== TSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE---
+13== TSCGPNWYTGGS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD---
+14== TSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKM
+15== TSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKM
+16== TSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKM
+17== TSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKM
+18== DSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKM
+19== DSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKM
+20== TSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKM
+21== TSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKM
+22== TSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKM
+23== TACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKM
+24== TACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKM
+25== TACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKM
+26== TSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKM
+27== TSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRL
+28== CNCSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRL
+29== CLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG---
+30== CLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG---
+31=p CLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG---
+32== CTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKT
+33=p CTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKK
+34== CIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKA
+35=p CIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKA
+36== CVVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARG
+ : * : * :
+
+1== ------------------------------------------------------------
+2== ------------------------------------------------------------
+3== ------------------------------------------------------------
+4=p ------------------------------------------------------------
+5=p ------------------------------------------------------------
+6== ------------------------------------------------------------
+7== ------------------------------------------------------------
+8=opsin, ------------------------------------------------------------
+9== ------------------------------------------------------------
+10== ------------------------------------------------------------
+11== ------------------------------------------------------------
+12== ------------------------------------------------------------
+13== ------------------------------------------------------------
+14== NVE------------------SLRS-----------------------------------
+15== NVD------------------SLRS-----------------------------------
+16== NVE------------------SLRS-----------------------------------
+17== NVE------------------SLRS-----------------------------------
+18== NVS------------------TLRS-----------------------------------
+19== NVT------------------NLRS-----------------------------------
+20== NVK------------------SLRS-----------------------------------
+21== NVK------------------SLRS-----------------------------------
+22== NVK------------------SLRS-----------------------------------
+23== NVK------------------SLRS-----------------------------------
+24== NVK------------------SLRS-----------------------------------
+25== NVA------------------SLRS-----------------------------------
+26== NVA------------------SLRA-----------------------------------
+27== NAK------------------ELRK-----------------------------------
+28== NAK------------------ELRK-----------------------------------
+29== -----------FPRVEPDSVIALNG-----------------------------------
+30== -----------FPRVQPESVISLNG-----------------------------------
+31=p -----------FPRVQPESVISLNG-----------------------------------
+32== GADTRHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALC-------------
+33=p GAGTSLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPC-------------
+34== RLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKK
+35=p RLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKK
+36== GVG---------------------------------------------PPP---------
+
+
+1== ------------------------------------------------------------
+2== ------------------------------------------------------------
+3== ------------------------------------------------------------
+4=p ------------------------------------------------------------
+5=p ------------------------------------------------------------
+6== ------------------------------------------------------------
+7== ------------------------------------------------------------
+8=opsin, ------------------------------------------------------------
+9== ------------------------------------------------------------
+10== ------------------------------------------------------------
+11== ------------------------------------------------------------
+12== ------------------------------------------------------------
+13== ------------------------------------------------------------
+14== ------------------------------------------------------------
+15== ------------------------------------------------------------
+16== ------------------------------------------------------------
+17== ------------------------------------------------------------
+18== ------------------------------------------------------------
+19== ------------------------------------------------------------
+20== ------------------------------------------------------------
+21== ------------------------------------------------------------
+22== ------------------------------------------------------------
+23== ------------------------------------------------------------
+24== ------------------------------------------------------------
+25== ------------------------------------------------------------
+26== ------------------------------------------------------------
+27== ------------------------------------------------------------
+28== ------------------------------------------------------------
+29== ---------------------------IVKLQKE-------------------VEECAN-
+30== ---------------------------VVKLQKE-------------------VEECAN-
+31=p ---------------------------VVKLQKE-------------------VEECAN-
+32== -----------------------------------ANGAVRQGDDGAALEVIEVHRVGNS
+33=p -----------------------------------TNGAVRQGDDEATLEVIEVHRVGNS
+34== LPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNG
+35=p LPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNG
+36== VPAGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSCANG
+
+
+1== -----------------------------------STQKAEKEVTRMVVLMVIGFLVCWV
+2== -----------------------------------TTQKAEKEVTRMVIIMVIFFLICWL
+3== -----------------------------------TTQKAEKEVTRMVILMVLGFMLAWT
+4=p -----------------------------------STQKAEREVTKMVILMVFGFLIAWT
+5=p -----------------------------------STQKAEREVTKMVILMVLGFLVAWT
+6== -----------------------------------STQKAEREVTKMVVVMVLGFLVCWA
+7== -----------------------------------TTQKAEREVSRMVVVMVGSFCVCYV
+8=opsin, -----------------------------------STQKAEKEVTRMVVVMVLAFC----
+9== -----------------------------------STQKAEKEVTRMVVVMVLAFCFCWG
+10== -----------------------------------STQKAEKEVTRMVVVMIFAYCVCWG
+11== -----------------------------------STQKAEREVSRMVVVMIVAFCICWG
+12== -----------------------------------STQKAEKEVSRMVVVMIVAYCFCWG
+13== -----------------------------------TTQRAEREVTRMVIVMVMAFLLCWL
+14== -------------------------------NVDKNKETAEIRIAKAAITICFLFFCSWT
+15== -------------------------------NVDKSKEAAEIRIAKAAITICFLFFASWT
+16== -------------------------------NVDKSKETAEIRIAKAAITICFLFFVSWT
+17== -------------------------------NVDKSKETAEIRIAKAAITICFLFFVSWT
+18== --------------------------------NEADAQRAEIRIAKTALVNVSLWFICWT
+19== --------------------------------NEAETQRAEIRIAKTALVNVSLWFICWT
+20== --------------------------------SEDAEKSAEGKLAKVALVTITLWFMAWT
+21== --------------------------------SEDAEKSAEGKLAKVALVTITLWFMAWT
+22== --------------------------------SEDADKSAEGKLAKVALVTISLWFMAWT
+23== --------------------------------SEDCDKSAEGKLAKVALTTISLWFMAWT
+24== --------------------------------SEDCDKSAENKLAKVALTTISLWFMAWT
+25== --------------------------------SENQNTSAECKLAKVALMTISLWFMAWT
+26== -------------------------------NADQQKQSAECRLAKVAMMTVGLWFMAWT
+27== ---------------------------------AQAGASAEMKLAKISMVIITQFMLSWS
+28== ---------------------------------AQAGANAEMRLAKISIVIVSQFLLSWS
+29== -----------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWL
+30== -----------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWL
+31=p -----------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWL
+32== KEHLPLPSEAG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWL
+33=p KEHLPLPSESG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWL
+34== KK-------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWL
+35=p KK-------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWL
+36== LEADPPTTGYGAVAAAYYPSLVRR------KPKEAADSKRERKAAKTLAIITGAFVACWL
+ * . :
+
+1== PYASVAFYIFT---HQGSD-FGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLC
+2== PYASVAMYIFT---HQGSN-FGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLC
+3== PYAVVAFWIFT---NKGAD-FTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTIC
+4=p PYATVAAWIFF---NKGAD-FSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIF
+5=p PYATVAAWIFF---NKGAA-FSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLF
+6== PYASFSLWIVS---HRGEE-FDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMVC
+7== PYAAFAMYMVN---NRNHG-LDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMVC
+8=opsin, ------------------------------------------------------------
+9== PYAFFACFAAA---NPGYP-FHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF-
+10== PYTFFACFAAA---NPGYA-FHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF-
+11== PYASFVSFAAA---NPGYA-FHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF-
+12== PYTFFACFAAA---NPGYA-FHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF-
+13== PYSTFALVVAT---HKGII-IQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLC
+14== PYGVMSLIGAF---GDKTL-LTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCP
+15== PYGVMSLIGAF---GDKTL-LTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCP
+16== PYGVMSLIGAF---GDKSL-LTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCP
+17== PYGVMSLIGAF---GDKSL-LTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCP
+18== PYALISLKGVM---GDTSG-ITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLP
+19== PYAAITIQGLL---GNAEG-ITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLP
+20== PYLVINCMGLF---KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP
+21== PYLVINCMGLF---KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP
+22== PYLVINCMGLF---KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP
+23== PYLVICYFGLF---KI-DG-LTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCP
+24== PYLIICYFGLF---KI-DG-LTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCP
+25== PYLVINFSGIF---NL-VK-ISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFP
+26== PYLIISWAGVF---SSGTR-LTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFP
+27== PYAIIALLAQF---GPAEW-VTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFP
+28== PYAVVALLAQF---GPLEW-VTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFP
+29== PFFLLSTARPFICGTSCSC-IPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ
+30== PFFLLSTARPFICGTSCSC-IPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQ
+31=p PFFLLSTARPFICGTSCSC-IPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ
+32== PFFIVALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK
+33=p PFFIVALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK
+34== PFFIIALIGPF---VDPEG-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF
+35=p PFFIIALIGPF---VDPEG-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF
+36== PFFVLAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLC
+
+
+1== C-GKNPLGDDE-SGA-STSKTEVSSVS-TSPV----------------------------
+2== C-GKNPLGDDE-ASA-TASKTE------TSQV----------------------------
+3== C-GKNPFGDEDVSSTVSQSKTEVSSVS-SSQV----------------------------
+4=p C-GKNPLGDDE-SSTVSTSKTEVSS------V----------------------------
+5=p C-GKNPLGDEE-SSTVSTSKTEVSS------V----------------------------
+6== --GKN-IEEDE-AST-SSQVTQVSSVA-PEK-----------------------------
+7== --GKA-MTDES-DTC-SSQKTEVSTVS-STQV----------------------------
+8=opsin, ------------------------------------------------------------
+9== --GKK-VDDGS-ELS-SASKTEVSSV---SSV----------------------------
+10== --GKK-VDDGS-ELS-SASKTEVSSV---SSV----------------------------
+11== --GKK-VDDGS-EAS-TTSRTEVSSVS-NSSV----------------------------
+12== --GKK-VDDGS-EVS--TSRTEVSSVS-NSSV----------------------------
+13== C-GYQPQRTGKASPGTPGPHADVTAAGLRNKV----------------------------
+14== WLALNEKAPES-SAV-ASTST---TQE-PQQT----------------------------
+15== WLAISEKAPES-RAA-ISTST---TQE-QQQT----------------------------
+16== WLGVNEKSGEI-SSA-QSTTT---QEQ--QQT----------------------------
+17== WLGVNEKSGEA-SSA-QSTTT---QEQ-TQQT----------------------------
+18== WFCVHETETKS-NDD-SQSNS---TVA-Q-------------------------------
+19== WFCVHEKDPND-VEE-NQSSN---TQT-Q-------------------------------
+20== CCVFGKVDDGK-SSD-AQSQA-TASEA-E-------------------------------
+21== CCVFGKVDDGK-SSD-AQSQA-TASEA-E-------------------------------
+22== CCVFGKVDDGK-SSE-AQSQA-TTSEA-E-------------------------------
+23== MCVFGNTDEPK-PDA-PASDTETTSEA-D-------------------------------
+24== MCVCGTTDEPK-PDA-PPSDTETTSEA-E-------------------------------
+25== SLACA-AEPSS-DAV-STTSG-TTTVT-DNEK----------------------------
+26== SLACGSGESGS-DVK-SEASA-TTTME-EKPK----------------------------
+27== WLLTCCQFDEK-ECE-DANDAEEEVVA-SER---GGESRDAAQMKEMMAMMQKMQAQQAA
+28== WVLTCCQFDDK-ETE-DDKDAETEIPA-GESSD-AAPSADAAQMKEMMAMMQKMQQQQAA
+29== --CQYRNINRKLSAA-GMHEALKLAER-PERPEFVLQNADYCRKKG--------------
+30== --CQYRNINRKLSAA-GMHEALKLAER-PERSEFVLQNSDHCGKKG--------------
+31=p --CQYRNINRKLSAA-GMHEALKLAER-PERSEFVLQNSDHCGKKG--------------
+32== --CKFCR-----------------------------------------------------
+33=p --CKFCR-----------------------------------------------------
+34== --GKYRR-----------------------------------------------------
+35=p --GKYRR-----------------------------------------------------
+36== --GRRVR-RRR-------------------------------------------------
+
+
+1== ------------------------------------------------------------
+2== ------------------------------------------------------------
+3== ------------------------------------------------------------
+4=p ------------------------------------------------------------
+5=p ------------------------------------------------------------
+6== ------------------------------------------------------------
+7== ------------------------------------------------------------
+8=opsin, ------------------------------------------------------------
+9== ------------------------------------------------------------
+10== ------------------------------------------------------------
+11== ------------------------------------------------------------
+12== ------------------------------------------------------------
+13== ---------------------------------------MP-------------------
+14== ------------------------------------------------------------
+15== ------------------------------------------------------------
+16== ------------------------------------------------------------
+17== ------------------------------------------------------------
+18== ------------------------------------------------------------
+19== ------------------------------------------------------------
+20== ------------------------------------------------------------
+21== ------------------------------------------------------------
+22== ------------------------------------------------------------
+23== ------------------------------------------------------------
+24== ------------------------------------------------------------
+25== ------------------------------------------------------------
+26== ------------------------------------------------------------
+27== YQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGA
+28== YPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP------AA
+29== ------------------------------------------------------------
+30== ------------------------------------------------------------
+31=p ------------------------------------------------------------
+32== ------------------------------------------------------------
+33=p ------------------------------------------------------------
+34== ------------------------------------------------------------
+35=p ------------------------------------------------------------
+36== ------------------------------------------------------------
+
+
+1== ---------SPA
+2== ---------APA
+3== ---------SPA
+4=p ---------SPA
+5=p ---------SPA
+6== ------------
+7== ---------GPN
+8=opsin, ------------
+9== ---------SPA
+10== ---------SPA
+11== ---------APA
+12== ---------SPA
+13== --------AHPV
+14== ---------TAA
+15== ---------TAA
+16== ---------TAA
+17== ---------SAA
+18== ---------DKA
+19== ---------EKS
+20== ---------SKA
+21== ---------SKA
+22== ---------SKA
+23== ---------SKA
+24== ---------SKD
+25== ---------SNA
+26== --------IPEA
+27== PPQGVDNQAYQA
+28== PPQGVDNQAYQA
+29== ---------HDS
+30== ---------HDT
+31=p ---------HDT
+32== -----------Q
+33=p -----------R
+34== ---------GHR
+35=p ---------GHR
+36== ---------APQ
+
diff -r 6f28e90db932 -r bf28a8cff401 test-data/mafft_custom_parttree.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mafft_custom_parttree.aln Wed Mar 20 07:34:52 2024 +0000
@@ -0,0 +1,504 @@
+> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
+MN-------------------GTE------GDNFYVPF----------------SNKTGL
+-----------ARSPYEYPQY-----YLAEPWK--------------------Y------
+------------------SALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNL
+AMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIER
+YIVICKPMGN-FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSC
+GPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---AAAAQQ------------------------------------ESASTQKAEKEVTRM
+VVLMVIGFLVCWVPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMN
+KQFRNCMITTLC----C---GKNPLGD-DE--SGASTSKTEV------------------
+------------------------------------------------------------
+---SSVS-------TSPVSP-A----------
+> 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94]
+MN-------------------GTE------GPNFYVPF----------------SNITGV
+-----------VRSPFEQPQY-----YLAEPWQ--------------------F------
+------------------SMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNL
+AVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIER
+YVVVCKPMSN-FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSC
+GIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---AAAQQQ------------------------------------ESATTQKAEKEVTRM
+VIIMVIFFLICWLPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMN
+KQFRNCMLTSLC----C---GKNPLGD-DE--ASATASKTE-------------------
+------------------------------------------------------------
+--------------TSQVAP-A----------
+> 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9
+MN-------------------GTE------GINFYVPM----------------SNKTGV
+-----------VRSPFEYPQY-----YLAEPWK--------------------Y------
+------------------RLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNL
+AVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIER
+YIVVCKPMGN-FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSC
+GPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---AAAQQQ------------------------------------ESATTQKAEKEVTRM
+VILMVLGFMLAWTPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMN
+KQFRNCMITTIC----C---GKNPFGD-EDVSSTVSQSKTEV------------------
+------------------------------------------------------------
+---SSVS-------SSQVSP-A----------
+> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
+MN-------------------GTE------GKNFYVPM----------------SNRTGL
+-----------VRSPFEYPQY-----YLAEPWQ--------------------F------
+------------------KILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNL
+AVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIER
+YIVVCKPMGS-FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSC
+GPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---AAAQQQ------------------------------------DSASTQKAEREVTKM
+VILMVFGFLIAWTPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLN
+KQFRNCMLTTIF----C---GKNPLGD-DE-SSTVSTSKTEV------------------
+------------------------------------------------------------
+---SS------------VSP-A----------
+> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
+MN-------------------GTE------GNNFYVPL----------------SNRTGL
+-----------VRSPFEYPQY-----YLAEPWQ--------------------F------
+------------------KLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNL
+AVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIER
+YIVVCKPMGS-FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSC
+GPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---AAAQQQ------------------------------------DSASTQKAEREVTKM
+VILMVLGFLVAWTPYATVAAWIFF-NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLN
+KQFRSCMLTTLF----C---GKNPLGD-EE-SSTVSTSKTEV------------------
+------------------------------------------------------------
+---SS------------VSP-A----------
+> 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208-
+MK-------------------QVPE----FHEDFYIPIPL------------DINNLSAY
+-------------SPFLVPQD-----HLGNQGI--------------------F------
+------------------MAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNL
+SIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFER
+WLVICKPLGN-FTFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSC
+GPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---AAKAQA------------------------------------DSASTQKAEREVTKM
+VVVMVLGFLVCWAPYASFSLWIVS-HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMN
+KQFRSCMM-KMV----C---GKN-IEE-DE--ASTSSQVTQV------------------
+------------------------------------------------------------
+---SS------------VAPEK----------
+> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
+MR-------------------KMS------EEEFYL-----------------FKNISSV
+-------------GPWDGPQY-----HIAPVWA--------------------F------
+------------------YLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNV
+SFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFER
+YIVICKPFGN-FRFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSC
+GPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---VAAQQQ------------------------------------ESATTQKAEREVSRM
+VVVMVGSFCVCYVPYAAFAMYMVN-NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMN
+KQFQACIM-KMV----C---GKA-MTD-ES--DTCSSQKTEV------------------
+------------------------------------------------------------
+---STVS-------STQVGP-N----------
+> 8=opsin, greensensitive human (fragment) S07060
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER
+WLVVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSC
+GPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---VAKQQK------------------------------------ESESTQKAEKEVTRM
+VVVMVLAFC---------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+--------------------------------
+> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
+MAQQWSLQRLAGRHPQDSYEDSTQ------SSIFTYTN----------------SNST--
+------------RGPFEGPNY-----HIAPRWV--------------------Y------
+------------------HLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNL
+AVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER
+WMVVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSC
+GPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---VAKQQK------------------------------------ESESTQKAEKEVTRM
+VVVMVLAFCFCWGPYAFFACFAAA-NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMN
+RQFRNCILQLF---------GKK-VDD-GS--ELSSASKTEV------------------
+------------------------------------------------------------
+---SSV---------SSVSP-A----------
+> 10== Z68193 1 human Red Opsin <>[]
+MAQQWSLQRLAGRHPQDSYEDSTQ------SSIFTYTN----------------SNST--
+------------RGPFEGPNY-----HIAPRWV--------------------Y------
+------------------HLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNL
+AVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER
+WLVVCKPFGN-VRFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSC
+GPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---VAKQQK------------------------------------ESESTQKAEKEVTRM
+VVVMIFAYCVCWGPYTFFACFAAA-NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMN
+RQFRNCILQLF---------GKK-VDD-GS--ELSSASKTEV------------------
+------------------------------------------------------------
+---SSV---------SSVSP-A----------
+> 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92]
+MTEAWNVAVFAARRSRDD-DDTTR------GSVFTYTN----------------TNNT--
+------------RGPFEGPNY-----HIAPRWV--------------------Y------
+------------------NLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNL
+AFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWER
+WFVVCKPFGN-IKFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSC
+GPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---VAAQQK------------------------------------ESESTQKAEREVSRM
+VVVMIVAFCICWGPYASFVSFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMN
+RQFRNCIMQLF---------GKK-VDD-GS--EASTTSRTEV------------------
+------------------------------------------------------------
+---SSVS-------NSSVAP-A----------
+> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
+MA-AWEAAFAARRRHEE--EDTTR------DSVFTYTN----------------SNNT--
+------------RGPFEGPNY-----HIAPRWV--------------------Y------
+------------------NLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNL
+AVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWER
+WFVVCKPFGN-IKFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSC
+GPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---VAAQQK------------------------------------ESESTQKAEKEVSRM
+VVVMIVAYCFCWGPYTFFACFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMN
+RQFRNCILQLF---------GKK-VDD-GS--EVST-SRTEV------------------
+------------------------------------------------------------
+---SSVS-------NSSVSP-A----------
+> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
+MS-----------------SNSSQ------AP----------------------PNGT--
+------------PGPFDGPQW----PYQAPQST--------------------Y------
+------------------VGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNL
+AVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALER
+YVVVCKPLGD-FQFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSC
+GPNWYTGGS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---AAAQQK------------------------------------EADTTQRAEREVTRM
+VIVMVMAFLLCWLPYSTFALVVAT-HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMN
+KQFQSCLLEMLC----CGYQPQR-TGK-AS--PGTPGPHADV------------------
+------------------------------------------------------------
+---TAAG------LRNKVMP-AH-------PV
+> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
+MESGNV---------------SSSLFGNVST-ALRPEARLSA---ETRLLGW--------
+------------NVPPEELR------HIPEHWL-----------TYPEPPESMN------
+------------------YLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINL
+AFCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDR
+FNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSC
+TFDYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---LRDQAKK----------MNVESL----------------RSNVDKNKETAEIRIAKA
+AITICFLFFCSWTPYGVMSLIGAF-GDKT--LLTPGATMIPACACKMVACIDPFVYAISH
+PRYRMELQKRCP----WLALNEKAPE--SS-AVASTSTTQEP------------------
+------------------------------------------------------------
+---------------QQTTA-A----------
+> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
+MEYHNV---------------SSVL-GNVSS-VLRPDARLSA---ESRLLGW--------
+------------NVPPDELR------HIPEHWL-----------IYPEPPESMN------
+------------------YLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINL
+AFCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDR
+YNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSC
+TFDYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---LRDQAKK----------MNVDSL----------------RSNVDKSKEAAEIRIAKA
+AITICFLFFASWTPYGVMSLIGAF-GDKT--LLTPGATMIPACTCKMVACIDPFVYAISH
+PRYRMELQKRCP----WLAISEKAPE--SR-AAISTSTTQEQ------------------
+------------------------------------------------------------
+---------------QQTTA-A----------
+> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
+ME---------------------PL-CNASEPPLRPEAR-SSGNGDLQFLGW--------
+------------NVPPDQIQ------YIPEHWL-----------TQLEPPASMH------
+------------------YMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNL
+AVFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDR
+YNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSC
+SFDYLS--DNFDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---LREQAKK----------MNVESL----------------RSNVDKSKETAEIRIAKA
+AITICFLFFVSWTPYGVMSLIGAF-GDKS--LLTQGATMIPACTCKLVACIDPFVYAISH
+PRYRLELQKRCP----WLGVNEKSGE--IS-SAQST-TTQEQ------------------
+------------------------------------------------------------
+---------------QQTTA-A----------
+> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
+MD---------------------AL-CNASEPPLRPEARMSSGSDELQFLGW--------
+------------NVPPDQIQ------YIPEHWL-----------TQLEPPASMH------
+------------------YMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNL
+AVFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDR
+YNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSC
+SFDYLS--DNFDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---LREQAKK----------MNVESL----------------RSNVDKSKETAEIRIAKA
+AITICFLFFVSWTPYGVMSLIGAF-GDKS--LLTPGATMIPACTCKLVACIEPFVYAISH
+PRYRMELQKRCP----WLGVNEKSGE--AS-SAQST-TTQEQ------------------
+------------------------------------------------------------
+--------------TQQTSA-A----------
+> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1
+MT-------------------------NATGPQMAYYGAASMD------FGYP-EGVSIV
+-----------DFVRPEIKP------YVHQHWY-----------NYPPVNPMWH------
+------------------YLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNL
+ALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDR
+YNIICNGFNG-PKLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSC
+SYDYLT--QDFNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MRAQAKK----------MNVSTL----------------RS-NEADAQRAEIRIAKT
+ALVNVSLWFICWTPYALISLKGVM-GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISH
+PKYRLAITQHLP----WFCVHETETKS-ND-DSQSNSTVAQ-------------------
+------------------------------------------------------------
+------------------DK-A----------
+> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1
+MA-------------------------NVTGPQMAFYGSGAAT------FGYP-EGMTVA
+-----------DFVPDRVKH------MVLDHWY-----------NYPPVNPMWH------
+------------------YLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNL
+ALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDR
+YNIICNGFNG-PKLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSC
+SYDYFT--RDMNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MRAQAKK----------MNVTNL----------------RS-NEAETQRAEIRIAKT
+ALVNVSLWFICWTPYAAITIQGLL-GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISH
+PKFRLAITQHLP----WFCVHEKDPND-VE-ENQSSNTQTQ-------------------
+------------------------------------------------------------
+------------------EK-S----------
+> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
+ME-------------------SFAVAAAQLGPHFAPLS-----------------NGSVV
+-----------DKVTPDMAH------LISPYWN-----------QFPAMDPIWA------
+------------------KILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINL
+AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR
+YQVIVKGMAG-RPMTIPLALGKM---------------------------YVPEGNLTSC
+GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MREQAKK----------MNVKSL----------------RS-SEDAEKSAEGKLAKV
+ALVTITLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH
+PKYRLALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------
+------------------------------------------------------------
+---E--------------SK-A----------
+> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
+ME-------------------SFAVAAAQLGPHFAPLS-----------------NGSVV
+-----------DKVTPDMAH------LISPYWN-----------QFPAMDPIWA------
+------------------KILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINL
+AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR
+YQVIVKGMAG-RPMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSC
+GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MREQAKK----------MNVKSL----------------RS-SEDAEKSAEGKLAKV
+ALVTITLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH
+PKYRLALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------
+------------------------------------------------------------
+---E--------------SK-A----------
+> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
+MD-------------------SFAAVATQLGPQFAAPS-----------------NGSVV
+-----------DKVTPDMAH------LISPYWD-----------QFPAMDPIWA------
+------------------KILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINL
+AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR
+YQVIVKGMAG-RPMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSC
+GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MREQAKK----------MNVKSL----------------RS-SEDADKSAEGKLAKV
+ALVTISLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH
+PKYRLALKEKCP----CCVFGKVDDGK-SS-EAQSQATTSEA------------------
+------------------------------------------------------------
+---E--------------SK-A----------
+> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
+MERSH--------------LPETPFDLAHSGPRFQAQSSG---------------NGSVL
+-----------DNVLPDMAH------LVNPYWS-----------RFAPMDPMMS------
+------------------KILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNL
+AFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDR
+YNVIVKGING-TPMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTAC
+SIDYMT--RMWNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MREQAKK----------MNVKSL----------------RS-SEDCDKSAEGKLAKV
+ALTTISLWFMAWTPYLVICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH
+PKYRIVLKEKCP----MCVFGNTDEPKPDA-PASDTETTSEA------------------
+------------------------------------------------------------
+---D--------------SK-A----------
+> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
+MERSL--------------LPEPPLAMALLGPRFEAQTGG---------------NRSVL
+-----------DNVLPDMAP------LVNPHWS-----------RFAPMDPTMS------
+------------------KILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNL
+AFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDR
+YNVIVKGING-TPMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTAC
+SIDYMT--RQWNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MRDQAKK----------MNVKSL----------------RS-SEDCDKSAENKLAKV
+ALTTISLWFMAWTPYLIICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH
+PNDRLVLKEKCP----MCVCGTTDEPKPDA-PPSDTETTSEA------------------
+------------------------------------------------------------
+---E--------------SK-D----------
+> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
+MI-------------------------AVSGPSYEAFSYGGQA-----RF----NNQTVV
+-----------DKVPPDMLH------LIDANWY-----------QYPPLNPMWH------
+------------------GILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINL
+AISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDR
+YNVIVKGLSG-KPLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTAC
+GTDYFN--RGLLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MREQAKK----------MNVASL----------------RS-SENQNTSAECKLAKV
+ALMTISLWFMAWTPYLVINFSGIF-NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISH
+PKYRAALFAKFP----SLAC-AAEPSS-DA-VSTTSGTTTVT------------------
+------------------------------------------------------------
+---DNEK-----------SN-A----------
+> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
+M---------------------------ANQLSYSSLGWPYQP------------NASVV
+-----------DTMPKEMLY------MIHEHWY-----------AFPPMNPLWY------
+------------------SILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNL
+AFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDR
+YNVIVRGMAA-APLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSC
+TVDYLT--KDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---LREQAKK----------MNVASL----------------RANADQQKQSAECRLAKV
+AMMTVGLWFMAWTPYLIISWAGVF-SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISH
+PRYKAALYQRFP----SLACGSGESGS-DV-KSEASATTTME------------------
+------------------------------------------------------------
+---EKPK----------IPE-A----------
+> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
+MVESTTL------------VNQT--------------------------WWY---NPTVD
+---------------------------IHPHWA-----------KFDPIPDAVY------
+------------------YSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINL
+AMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDR
+YNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSC
+SFDYLS--TDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MAAMAKR----------LNAKEL----------------R--KAQAGASAEMKLAKI
+SMVIITQFMLSWSPYAIIALLAQF-GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSH
+PKFREAIQTTFPWLLTCCQFDEKECED-AN-DAEEEVVASER--GGESRDAAQMKEMMAM
+MQKMQAQQAAYQPPPPPQGY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAP
+PQVEAPQGAPPQGVDNQAYQ-A----------
+> 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93]
+MGRDLR-------------DNET--------------------------WWY---NPSIV
+---------------------------VHPHWR-----------EFDQVPDAVY------
+------------------YSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINL
+AFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDR
+YNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNC
+SFDYIS--RDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE-----------
+------------------------------------------------------------
+------------------------------------------------------------
+---MAAMAKR----------LNAKEL----------------R--KAQAGANAEMRLAKI
+SIVIVSQFLLSWSPYAVVALLAQF-GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSH
+PKFREAISQTFPWVLTCCQFDDKETED-DK-DAETEIPAGESSDAAPSADAAQMKEMMAM
+MQKMQQQQAAY----PPQGYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAP
+PQ-GAPPAAPPQGVDNQAYQ-A----------
+> 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
+M-----------------------------------------------------------
+--------------PHLLSGFLEVTASPAPTWD------------APPDNVSGC------
+-------GEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSL
+ALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDR
+YLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VC
+LIS--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------
+---------PGF---------------------------------PRVQPESVI------
+----------------SLNGVVK----------------------LQKEVEECAN-----
+---LSRLLKH--------------------------------ER-KNISIFKREQKAATT
+LGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFN
+RDLRPTSRSLLQ--------CQYRNIN----RKLSAAGMHEA------------------
+-LKLA-------------------------------------------------------
+---ERPE------RSEFVLQNSDHCGKKGHDT
+> 31=p A47425 serotonin receptor 5HT-7 - rat
+M-----------------------------------------------------------
+--------------PHLLSGFLEVTASPAPTWD------------APPDNVSGC------
+-------GEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSL
+ALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDR
+YLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VC
+LIS--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------
+---------PGF---------------------------------PRVQPESVI------
+----------------SLNGVVK----------------------LQKEVEECAN-----
+---LSRLLKH--------------------------------ER-KNISIFKREQKAATT
+LGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFN
+RDLRTTYRSLLQ--------CQYRNIN----RKLSAAGMHEA------------------
+-LKLA-------------------------------------------------------
+---ERPE------RSEFVLQNSDHCGKKGHDT
+> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
+M-DVLS-------------PGQ--------GNNTTSPPAPFETG----------GNTTGI
+-------------------------SDVTVSYQ---------------------------
+------------------VITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSL
+AVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDR
+YWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---AC
+TIS--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK-----------
+----------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGS
+RNWRLGVESKAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP-
+----ASFERK-----------NERNA----------------EA-KRKMALARERKTVKT
+LGIIMGTFILCWLPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFN
+KDFQNAFKKIIK--------CKFCR-----------------------------------
+------------------------------------------------------------
+-------------------------------Q
+> 33=p A35181 serotonin receptor class 1A - rat
+M-DVFS-------------FGQ--------GNNTTASQEPFGTG----------GNVTSI
+-------------------------SDVTFSYQ---------------------------
+------------------VITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSL
+AVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDR
+YWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---AC
+TIS--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK-----------
+----------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGS
+GDWRRCAENRAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP-
+----ACLERK-----------NERNA----------------EA-KRKMALARERKTVKT
+LGIIMGTFILCWLPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFN
+KDFQNAFKKIIK--------CKFCR-----------------------------------
+------------------------------------------------------------
+-------------------------------R
+> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
+M-ANFT-------------FGDLALDVARMGGLASTPSGLRSTG----------LTTPGL
+SPTG------------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSAT
+NMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSL
+AVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDR
+YWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTC
+IIS--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLK
+TEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK--------
+--------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEE
+AS-IAMLERQ-CNNGKKISSNDTPYS----------------RT-REKLELKRERKAART
+LAIITGAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFS
+PEFRSAFQKILF--------GKYRRG----------------------------------
+------------------------------------------------------------
+------------------------------HR
+> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
+M-ANFT-------------FGDLALDVARMGGLASTPSGLRSTG----------LTTPGL
+SPTG------------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSAT
+NMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSL
+AVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDR
+YWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTC
+IIS--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLK
+TEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK--------
+--------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEE
+AS-IAMLERQ-CNNGKKISSNDTPYS----------------RT-REKLELKRERKAART
+LAIITGAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFS
+PEFRSAFQKILF--------GKYRRG----------------------------------
+------------------------------------------------------------
+------------------------------HR
+> 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi
+M-EG-A-------------EGQEELD----WEALYLRLPLQNCS----------WNSTGW
+EPNW------------------NVTVVPNTTWW----------------QASAPFDTPAA
+LVRA--------------AAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSL
+AVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDR
+YWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RC
+VVS--------QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR-----------
+----------------------------------RRGATARGGVGPPPVP----------
+-----------------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNV
+SSNNTSPEKQSCANGLEA---DPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKT
+LAIITGAFVACWLPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFS
+PEFRHAFQRLLC--------GRRVRRR----R----------------------------
+------------------------------------------------------------
+-----------------------------APQ
diff -r 6f28e90db932 -r bf28a8cff401 test-data/mafft_custom_result.aln
--- a/test-data/mafft_custom_result.aln Tue Oct 31 15:48:53 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,458 +0,0 @@
-CLUSTAL format alignment by MAFFT F-INS-i (v7.455)
-
-
-1== MN------------------------GTE--GDNFYVPFS--------NKTGL-ARSPYE
-2== MN------------------------GTE--GPNFYVPFS--------NITGV-VRSPFE
-3== MN------------------------GTE--GINFYVPMS--------NKTGV-VRSPFE
-4=p MN------------------------GTE--GKNFYVPMS--------NRTGL-VRSPFE
-5=p MN------------------------GTE--GNNFYVPLS--------NRTGL-VRSPFE
-6== MK------------------------QVPEFHEDFYIPIP-------LDINNLSAYSPFL
-7== MR------------------------KMS--EEEFYL------------FKNISSVGPWD
-8=opsin, ------------------------------------------------------------
-9== MAQQWSLQ-RLAGRHPQDS----YEDSTQ--SSIFTYTNS--------NS----TRGPFE
-10== MAQQWSLQ-RLAGRHPQDS----YEDSTQ--SSIFTYTNS--------NS----TRGPFE
-11== MTEAWNVAVFAARRSRDD------DDTTR--GSVFTYTNT--------NN----TRGPFE
-12== MA-AWEAA-FAARRRHEE------EDTTR--DSVFTYTNS--------NN----TRGPFE
-13== MS----------------------SNSSQ--------------AP--PNG----TPGPFD
-14== MESG-NV-----------------------------------------SSSLFGNVSTAL
-15== MEYH-NV------------------------------------------SSVLGNVSSVL
-16== MEPLCNA------------------------------------------------SEPPL
-17== MDALCNA------------------------------------------------SEPPL
-18== MT-------------------------------------------------------NAT
-19== MA-------------------------------------------------------NVT
-20== ME-------------------------------------------------SFAVAAAQL
-21== ME-------------------------------------------------SFAVAAAQL
-22== MD-------------------------------------------------SFAAVATQL
-23== MERS--------------------------------------------HLPETPFDLAHS
-24== MERS--------------------------------------------LLPEPPLAMALL
-25== MI-------------------------------------------------------AVS
-26== M-----------------------------------------------------------
-27== MVESTT------------------------------------------------------
-28== MGRD--------------------------------------------------------
-29== MM-----DVNSSGRPDLYGH-----------LRSFLLPEVGRGLPDLSPDGGADPVAGSW
-30== M-----------------------------------------------------------
-31=p M-----------------------------------------------------------
-32== MD------VLSPGQ----------------------------------GNNTTSPPAPFE
-33=p MD------VFSFGQ----------------------------------GNNTTASQEPFG
-34== MA------NFTFGDLALDVARMGGLASTPS------------------GLRSTGLTTPGL
-35=p MA------NFTFGDLALDVARMGGLASTPS------------------GLRSTGLTTPGL
-36== ME-------GAEGQEELD-------------WEALYLRL---------PLQNCSWNSTGW
-
-
-1== YP----------------------------------------------------------
-2== QP----------------------------------------------------------
-3== YP----------------------------------------------------------
-4=p YP----------------------------------------------------------
-5=p YP----------------------------------------------------------
-6== VP----------------------------------------------------------
-7== GP----------------------------------------------------------
-8=opsin, ------------------------------------------------------------
-9== GP----------------------------------------------------------
-10== GP----------------------------------------------------------
-11== GP----------------------------------------------------------
-12== GP----------------------------------------------------------
-13== GP----------------------------------------------------------
-14== RPEARL----------------------SAETRLLGWNVPPEELRHIPEHWL--------
-15== RPDARL----------------------SAESRLLGWNVPPDELRHIPEHWL--------
-16== RPEAR-SSG-------------------NGDLQFLGWNVPPDQIQYIPEHWL--------
-17== RPEARMSSG-------------------SDELQFLGWNVPPDQIQYIPEHWL--------
-18== GPQMAYYGAASMDFG-------------YPEGVSIVDFVRPEIKPYVHQHWY--------
-19== GPQMAFYGSGAATFG-------------YPEGMTVADFVPDRVKHMVLDHWY--------
-20== GPHFAPLS-----------------------NGSVVDKVTPDMAHLISPYWN--------
-21== GPHFAPLS-----------------------NGSVVDKVTPDMAHLISPYWN--------
-22== GPQFAAPS-----------------------NGSVVDKVTPDMAHLISPYWD--------
-23== GPRFQAQSSG---------------------NGSVLDNVLPDMAHLVNPYWS--------
-24== GPRFEAQTGG---------------------NRSVLDNVLPDMAPLVNPHWS--------
-25== GPSYEAFSYG--GQA-------------RFNNQTVVDKVPPDMLHLIDANWY--------
-26== -ANQLSYSSL--GWP-------------YQPNASVVDTMPKEMLYMIHEHWY--------
-27== ----------LVNQT-------------WWYNPTV----------DIHPHWA--------
-28== ---------LRDNET-------------WWYNPSI----------VVHPHWR--------
-29== APHLLS-----------------------------------EVTASPAPTWDAPPDNASG
-30== -PHLLSGF--------------------------------LEVTASPAPTWDAPPDNVSG
-31=p -PHLLSGF--------------------------------LEVTASPAPTWDAPPDNVSG
-32== TG--------------------------------------GNTTGI--------------
-33=p TG--------------------------------------GNVTSI--------------
-34== SPTGLVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTD--DRYWSL-------
-35=p SPTGLVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTD--DRYWSL-------
-36== EPNW-------------------------------------NVTVVPNTTWWQ-------
-
-
-1== -----QY-YLAEPWKYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAM
-2== -----QY-YLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAV
-3== -----QY-YLAEPWKYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAV
-4=p -----QY-YLAEPWQFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAV
-5=p -----QY-YLAEPWQFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAV
-6== -----QD-HLGNQGIFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSI
-7== -----QY-HIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSF
-8=opsin, ------------------------------------------------------------
-9== -----NY-HIAPRWVYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAV
-10== -----NY-HIAPRWVYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAV
-11== -----NY-HIAPRWVYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAF
-12== -----NY-HIAPRWVYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAV
-13== -----QWPYQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAV
-14== -----TY-PEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAF
-15== -----IY-PEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAF
-16== -----TQ-LEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAV
-17== -----TQ-LEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAV
-18== -----NY-PPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLAL
-19== -----NY-PPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLAL
-20== -----QF-PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAI
-21== -----QF-PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAI
-22== -----QF-PAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAI
-23== -----RF-APMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAF
-24== -----RF-APMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAF
-25== -----QY-PPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAI
-26== -----AF-PPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAF
-27== -----KF-DPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAM
-28== -----EF-DQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAF
-29== CGEQINY----GRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLAL
-30== CGEQINY----GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLAL
-31=p CGEQINY----GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLAL
-32== SDVTVSY--------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAV
-33=p SDVTFSY--------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAV
-34== TVYSHEH--------LVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAV
-35=p TVYSHEH--------LVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAV
-36== --ASAPFDTPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAV
-
-
-1== ANLFMVLFG-FTVTMYTSMNG-YFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYI
-2== ADLFMVFGG-FTTTLYTSLHG-YFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYV
-3== ADLFMACFG-FTVTFYTAWNG-YFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYI
-4=p AGTIMVCFG-FTVTFYTAING-YFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYI
-5=p AGAIMVCFG-FTVTFYTAING-YFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYI
-6== ANLFVAIFG-SPLSFYSFFNR-YFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWL
-7== GGFLLCIFS-VFPVFVASCNG-YFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYI
-8=opsin, -DLAETVIA-STISIVNQVSG-YFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWL
-9== ADLAETVIA-STISVVNQVYG-YFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWM
-10== ADLAETVIA-STISIVNQVSG-YFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWL
-11== VDLVETLVA-STISVFNQIFG-YFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWF
-12== ADLGETVIA-STISVINQISG-YFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWF
-13== ADLLVTLCG-SSVSLSNNING-FFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYV
-14== CDFMMMVK--TPIFIYNSFHQ-GYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFN
-15== CDFMMMIK--TPIFIYNSFHQ-GYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYN
-16== FDLIMCLK--APIF--NSFHR-GFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYN
-17== FDLIMCLK--APIFIYNSFHR-GFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYN
-18== SDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYN
-19== SDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYN
-20== SDFGIMITN-TPMMGINLYFE-TWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQ
-21== SDFGIMITN-TPMMGINLYFE-TWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQ
-22== SDFGIMITN-TPMMGINLYFE-TWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQ
-23== SDFCMMASQ-SPVMIINFYYE-TWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYN
-24== SDFCMMASQ-SPVMIINFYYE-TWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYN
-25== SNFLMMFCM-SPPMVINCYYE-TWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYN
-26== SDFCMMAFM-MPTMTSNCFAE-TWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYN
-27== SDLSFSAINGFPLKTISAFMK-KWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYN
-28== SDFTFSLVNGFPLMTISCFLK-KWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYN
-29== ADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYL
-30== ADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYL
-31=p ADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYL
-32== TDLMVSVLV-LPMAALYQVLN-KWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYW
-33=p TDLMVSVLV-LPMAALYQVLN-KWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYW
-34== ADLMVAVLV-MPLSVVSEISK-VWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYW
-35=p ADLMVAVLV-MPLSVVSEISK-VWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYW
-36== ADLLVACLV-MPLGAVYEVVQ-RWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYW
- . : : * : : :*:
-
-1== VICKPMGNFR-FGNTHAIMGVAFTWIMALACA-APPLV-GWSRYIPEGMQCSCGPDYYTL
-2== VVCKPMSNFR-FGENHAIMGVAFTWVMALACA-APPLV-GWSRYIPEGMQCSCGIDYYTL
-3== VVCKPMGNFR-FSATHAMMGIAFTWVMAFSCA-APPLF-GWSRYMPEGMQCSCGPDYYTH
-4=p VVCKPMGSFK-FSSSHAFAGIAFTWVMALACA-APPLF-GWSRYIPEGMQCSCGPDYYTL
-5=p VVCKPMGSFK-FSSTHASAGIAFTWVMAMACA-APPLV-GWSRYIPEGIQCSCGPDYYTL
-6== VICKPLGNFT-FKTPHAIAGCILPWISALAAS-LPPLF-GWSRYIPEGLQCSCGPDWYTT
-7== VICKPFGNFR-FSSKHALTVVLATWTIGIGVS-IPPFF-GWSRFIPEGLQCSCGPDWYTV
-8=opsin, VVCKPFGNVR-FDAKLAIVGIAFSWIWAAVWT-APPIF-GWSRYWPHGLKTSCGPDVFSG
-9== VVCKPFGNVR-FDAKLAIVGIAFSWIWAAVWT-APPIF-GWSRYWPHGLKTSCGPDVFSG
-10== VVCKPFGNVR-FDAKLAIVGIAFSWIWSAVWT-APPIF-GWSRYWPHGLKTSCGPDVFSG
-11== VVCKPFGNIK-FDSKLAIIGIVFSWVWAWGWS-APPIF-GWSRYWPHGLKTSCGPDVFSG
-12== VVCKPFGNIK-FDGKLAVAGILFSWLWSCAWT-APPIF-GWSRYWPHGLKTSCGPDVFSG
-13== VVCKPLGDFQ-FQRRHAVSGCAFTWGWALLWS-APPLL-GWSSYVPEGLRTSCGPNWYTG
-14== VITRPMEGK--MTHGKAIAMIIFIYMYATPWV-VACYTETWGRFVPEGYLTSCTFDYLTD
-15== VITRPMEGK--MTHGKAIAMIIFIYLYATPWV-VACYTESWGRFVPEGYLTSCTFDYLTD
-16== VITKPMNRN--MTFTKAVIMNIIIWLYCTPWV-VLPLTQFWDRFVPEGYLTSCSFDYLSD
-17== VITKPMNRN--MTFTKAVIMNIIIWLYCTPWV-VLPLTQFWDRFVPEGYLTSCSFDYLSD
-18== IICNGFNGPK-LTTGKAVVFALISWVIAIGCA-LPPFF-GWGNYILEGILDSCSYDYLTQ
-19== IICNGFNGPK-LTQGKATFMCGLAWVISVGWS-LPPFF-GWGSYTLEGILDSCSYDYFTR
-20== VIVKGMAGRP-MTIPLALGKM----------------------YVPEGNLTSCGIDYLER
-21== VIVKGMAGRP-MTIPLALGKIAYIWFMSSIWC-LAPAF-GWSRYVPEGNLTSCGIDYLER
-22== VIVKGMAGRP-MTIPLALGKIAYIWFMSTIWCCLAPVF-GWSRYVPEGNLTSCGIDYLER
-23== VIVKGINGTP-MTIKTSIMKILFIWMMAVFWT-VMPLI-GWSAYVPEGNLTACSIDYMTR
-24== VIVKGINGTP-MTIKTSIMKIAFIWMMAVFWT-IMPLI-GWSSYVPEGNLTACSIDYMTR
-25== VIVKGLSGKP-LSINGALIRIIAIWLFSLGWT-IAPMF-GWNRYVPEGNMTACGTDYFNR
-26== VIVRGMAAAP-LTHKKATLLLLFVWIWSGGWT-ILPFF-GWSRYVPEGNLTSCTVDYLTK
-27== VIGRPMAASKKMSHRRAFLMIIFVWMWSIVWS-VGPVF-NWGAYVPEGILTSCSFDYLST
-28== VIGRPMAASKKMSHRRAFIMIIFVWLWSVLWA-IGPIF-GWGAYTLEGVLCNCSFDYISR
-29== GITRPLTYPVRQNGKCMAKMILSVWLLSASIT-LPPLF-GWAQNVNDDKVC-----LISQ
-30== GITRPLTYPVRQNGKCMAKMILSVWLLSASIT-LPPLF-GWAQNVNDDKVC-----LISQ
-31=p GITRPLTYPVRQNGKCMAKMILSVWLLSASIT-LPPLF-GWAQNVNDDKVC-----LISQ
-32== AITDPIDYVNKRTPRRAAALISLTWLIGFLIS-IPPML-GWRTPEDRSDPDAC---TISK
-33=p AITDPIDYVNKRTPRRAAALISLTWLIGFLIS-IPPML-GWRTPEDRSDPDAC---TISK
-34== AVTS-IDYIRRRSARRILLMIMVVWIVALFIS-IPPLF-GWRD--PNNDPDKTGTCIISQ
-35=p AVTS-IDYIRRRSARRILLMIMVVWIVALFIS-IPPLF-GWRD--PNNDPDKTGTCIISQ
-36== AVTN-IDYIHASTAKRVGMMIACVWTVSFFVC-IAQLL-GWKDPDWNQRVSEDLRCVVSQ
- : :
-
-1== NPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQE---------------
-2== KPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQE---------------
-3== NPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQE---------------
-4=p NPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQD---------------
-5=p NPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQD---------------
-6== NNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQAD---------------
-7== GTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQE---------------
-8=opsin, SSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKE---------------
-9== SSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKE---------------
-10== SSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKE---------------
-11== SVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKE---------------
-12== SSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKE---------------
-13== GS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKE---------------
-14== NF--DTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVESLRSNV-
-15== NF--DTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVDSLRSNV-
-16== NF--DTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMNVESLRSNV-
-17== NF--DTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMNVESLRSNV-
-18== DF--NTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMNVSTLRSN--
-19== DM--NTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMNVTNLRSN--
-20== DW--NPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLRSS--
-21== DW--NPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLRSS--
-22== DW--NPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLRSS--
-23== MW--NPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMNVKSLRSS--
-24== QW--NPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMNVKSLRSS--
-25== GL--LSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMNVASLRSS--
-26== DW--SSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMNVASLRANA-
-27== DP--STRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLNAKELRKA--
-28== DS--TTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLNAKELRKA--
-29== D-----FGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP--------------
-30== D-----FGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP--------------
-31=p D-----FGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP--------------
-32== D-----HGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADTRHGASP
-33=p D-----HGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSS
-34== D-----KGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLV
-35=p D-----KGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLV
-36== D-----VGYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGA-------TARGGVG
- * : * :
-
-1== ------------------------------------------------------------
-2== ------------------------------------------------------------
-3== ------------------------------------------------------------
-4=p ------------------------------------------------------------
-5=p ------------------------------------------------------------
-6== ------------------------------------------------------------
-7== ------------------------------------------------------------
-8=opsin, ------------------------------------------------------------
-9== ------------------------------------------------------------
-10== ------------------------------------------------------------
-11== ------------------------------------------------------------
-12== ------------------------------------------------------------
-13== ------------------------------------------------------------
-14== ------------------------------------------------------------
-15== ------------------------------------------------------------
-16== ------------------------------------------------------------
-17== ------------------------------------------------------------
-18== ------------------------------------------------------------
-19== ------------------------------------------------------------
-20== ------------------------------------------------------------
-21== ------------------------------------------------------------
-22== ------------------------------------------------------------
-23== ------------------------------------------------------------
-24== ------------------------------------------------------------
-25== ------------------------------------------------------------
-26== ------------------------------------------------------------
-27== ------------------------------------------------------------
-28== ------------------------------------------------------------
-29== GFPRVE--------------------------------PDSVI----------ALNGIVK
-30== GFPRVQ--------------------------------PESVI----------SLNGVVK
-31=p GFPRVQ--------------------------------PESVI----------SLNGVVK
-32== A-PQPKKS-VNGESGSRNWRLGVESKAGGAL-----C-----------------ANGAVR
-33=p A-PPPKKS-LNGQPGSGDWRRCAENRAVGTP-----C-----------------TNGAVR
-34== ASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGV--
-35=p ASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGV--
-36== PPPVPAGGALVAGGGSGGIAAAVVAVIGR---------PLPTISET-------TTTGFTN
-
-
-1== ------------------------------------------------------------
-2== ------------------------------------------------------------
-3== ------------------------------------------------------------
-4=p ------------------------------------------------------------
-5=p ------------------------------------------------------------
-6== ------------------------------------------------------------
-7== ------------------------------------------------------------
-8=opsin, ------------------------------------------------------------
-9== ------------------------------------------------------------
-10== ------------------------------------------------------------
-11== ------------------------------------------------------------
-12== ------------------------------------------------------------
-13== ------------------------------------------------------------
-14== ------------------------------------------------------------
-15== ------------------------------------------------------------
-16== ------------------------------------------------------------
-17== ------------------------------------------------------------
-18== ------------------------------------------------------------
-19== ------------------------------------------------------------
-20== ------------------------------------------------------------
-21== ------------------------------------------------------------
-22== ------------------------------------------------------------
-23== ------------------------------------------------------------
-24== ------------------------------------------------------------
-25== ------------------------------------------------------------
-26== ------------------------------------------------------------
-27== ------------------------------------------------------------
-28== ------------------------------------------------------------
-29== -------------LQKEVEEC-AN------------------LSRLLKHER---------
-30== -------------LQKEVEEC-AN------------------LSRLLKHER---------
-31=p -------------LQKEVEEC-AN------------------LSRLLKHER---------
-32== QGDDGAALEV-----IEVHRV-GNSKEHLPLPSEAGP--TPCAPASFERKNERN------
-33=p QGDDEATLEV-----IEVHRV-GNSKEHLPLPSESGS--NSYAPACLERKNERN------
-34== -NSNSSSSERLKQIQIETAEAFANGC------------AEEASIAMLERQCNNGKKISSN
-35=p -NSNSSSSERLKQIQIETAEAFANGC------------AEEASIAMLERQCNNGKKISSN
-36== VSSNNTSP--------EKQSC-ANGLEADPPTTGYGAVAAAYYPSLVRRKP---------
-
-
-1== --------SASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT--HQGSDFGAT-FM
-2== --------SATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT--HQGSNFGPI-FM
-3== --------SATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT--NKGADFTAT-LM
-4=p --------SASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF--NKGADFSAK-FM
-5=p --------SASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF--NKGAAFSAQ-FM
-6== --------SASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVS--HRGEEFDLR-MA
-7== --------SATTQKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN--NRNHGLDLR-LV
-8=opsin, --------SESTQKAEKEVTRMVVVMVLAFC-----------------------------
-9== --------SESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA--NPGYPFHPL-MA
-10== --------SESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA--NPGYAFHPL-MA
-11== --------SESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAA--NPGYAFHPL-AA
-12== --------SESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA--NPGYAFHPL-AA
-13== --------ADTTQRAEREVTRMVIVMVMAFLLCWLPYSTFALVVAT--HKGIIIQPV-LA
-14== --------DKNKETAEIRIAKAAITICFLFFCSWTPYGVMSLIGAF--GDKTLLTPG-AT
-15== --------DKSKEAAEIRIAKAAITICFLFFASWTPYGVMSLIGAF--GDKTLLTPG-AT
-16== --------DKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF--GDKSLLTQG-AT
-17== --------DKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF--GDKSLLTPG-AT
-18== --------EADAQRAEIRIAKTALVNVSLWFICWTPYALISLKGVM--GDTSGITPL-VS
-19== --------EAETQRAEIRIAKTALVNVSLWFICWTPYAAITIQGLL--GNAEGITPL-LT
-20== --------EDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EGLTPL-NT
-21== --------EDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EGLTPL-NT
-22== --------EDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMGLF--KF-EGLTPL-NT
-23== --------EDCDKSAEGKLAKVALTTISLWFMAWTPYLVICYFGLF--KI-DGLTPL-TT
-24== --------EDCDKSAENKLAKVALTTISLWFMAWTPYLIICYFGLF--KI-DGLTPL-TT
-25== --------ENQNTSAECKLAKVALMTISLWFMAWTPYLVINFSGIF--NL-VKISPL-FT
-26== --------DQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVF--SSGTRLTPL-AT
-27== --------Q-AGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQF--GPAEWVTPY-AA
-28== --------Q-AGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQF--GPLEWVTPY-AA
-29== -------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVE
-30== -------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVE
-31=p -------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVE
-32== ----AEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCHMPTL-LG
-33=p ----AEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCHMPAL-LG
-34== DTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF--VDPEGIPPF-AR
-35=p DTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF--VDPEGIPPF-AR
-36== -------KEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT-CD--CEVSPV-LT
- * . :
-
-1== TLPAFFAKSSALYNPVIYILMNKQFRNCMITTL-----CC-----GKNPLGDDE-SGA-S
-2== TLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSL-----CC-----GKNPLGDDE-ASA-T
-3== AVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTI-----CC-----GKNPFGDEDVSSTVS
-4=p AIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTI-----FC-----GKNPLGDDE-SSTVS
-5=p AIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTL-----FC-----GKNPLGDEE-SSTVS
-6== TIPSCLSKASTVYNPVIYVLMNKQFRSCMMK-M-----VC-----GKN-IEEDE-AST-S
-7== TIPSFFSKSACIYNPIIYCFMNKQFQACIMK-M-----VC-----GKA-MTDES-DTC-S
-8=opsin, ------------------------------------------------------------
-9== ALPAFFAKSATIYNPVIYVFMNRQFRNCILQ-L-----F------GKK-VDDGS-ELS-S
-10== ALPAYFAKSATIYNPVIYVFMNRQFRNCILQ-L-----F------GKK-VDDGS-ELS-S
-11== ALPAYFAKSATIYNPVIYVFMNRQFRNCIMQ-L-----F------GKK-VDDGS-EAS-T
-12== ALPAYFAKSATIYNPIIYVFMNRQFRNCILQ-L-----F------GKK-VDDGS-EVS-T
-13== SLPSYFSKTATVYNPIIYVFMNKQFQSCLLEML-----CC-----GYQPQRTGKASPGTP
-14== MIPACACKMVACIDPFVYAISHPRYRMELQKRCPW---LALN---EK----APE-SSA-V
-15== MIPACTCKMVACIDPFVYAISHPRYRMELQKRCPW---LAIS---EK----APE-SRA-A
-16== MIPACTCKLVACIDPFVYAISHPRYRLELQKRCPW---LGVN---EK----SGE-ISS-A
-17== MIPACTCKLVACIEPFVYAISHPRYRMELQKRCPW---LGVN---EK----SGE-ASS-A
-18== TLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPW---FCVH---ET----ETK-SND-D
-19== TLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPW---FCVH---EK----DPN-DVE-E
-20== IWGACFAKSAACYNPIVYGISHPKYRLALKEKCP----CCVF---GKV--DDGK-SSD-A
-21== IWGACFAKSAACYNPIVYGISHPKYRLALKEKCP----CCVF---GKV--DDGK-SSD-A
-22== IWGACFAKSAACYNPIVYGISHPKYRLALKEKCP----CCVF---GKV--DDGK-SSE-A
-23== IWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCP----MCVF---GNT--DEPK-PDA-P
-24== IWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCP----MCVC---GTT--DEPK-PDA-P
-25== IWGSLFAKANAVYNPIVYGISHPKYRAALFAKFP----SLAC---AA----EPS-SDA-V
-26== IWGSVFAKANSCYNPIVYGISHPRYKAALYQRFP----SLAC---GS---GESG-SDV-K
-27== ELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQF---DEK--ECED-AND-A
-28== QLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQF---DDK--ETED-DKD-A
-29== RTFLWLGYANSLINPFIYAFFNRDLRTTYRSLL-----QCQYRNINRKL------SAAGM
-30== RTCLWLGYANSLINPFIYAFFNRDLRPTSRSLL-----QCQYRNINRKL------SAAGM
-31=p RTCLWLGYANSLINPFIYAFFNRDLRTTYRSLL-----QCQYRNINRKL------SAAGM
-32== AIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII-----KCKF---CRQ------------
-33=p AIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII-----KCKF---CRR------------
-34== SFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL-----FGKYR-RGHR------------
-35=p SFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL-----FGKYR-RGHR------------
-36== SLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLL------CGRRVRRRR------------
-
-
-1== TSKTEVSSVSTS------------------------------------------------
-2== ASKTETSQVA--------------------------------------------------
-3== QSKTEVSSVSSS------------------------------------------------
-4=p TSKTEVSSVS--------------------------------------------------
-5=p TSKTEVSSVS--------------------------------------------------
-6== SQVTQVSSVA--------------------------------------------------
-7== SQKTEVSTVSST------------------------------------------------
-8=opsin, ------------------------------------------------------------
-9== ASKTEVSSVS--------------------------------------------------
-10== ASKTEVSSVS--------------------------------------------------
-11== TSRTEVSSVSNS------------------------------------------------
-12== -SRTEVSSVSNS------------------------------------------------
-13== GPHADVTAAGLR------------------------------------------------
-14== ASTS-TTQEPQ-------------------------------------------------
-15== ISTS-TTQEQQ-------------------------------------------------
-16== QSTT-TQEQ-Q-------------------------------------------------
-17== QSTT-TQEQTQ-------------------------------------------------
-18== SQSNSTVAQDK-------------------------------------------------
-19== NQSSNTQTQEK-------------------------------------------------
-20== QSQA-TASEAE-------------------------------------------------
-21== QSQA-TASEAE-------------------------------------------------
-22== QSQA-TTSEAE-------------------------------------------------
-23== ASDTETTSEAD-------------------------------------------------
-24== PSDTETTSEAE-------------------------------------------------
-25== STTSGTTTVTDN------------------------------------------------
-26== SEASATTTMEEK------------------------------------------------
-27== EEEV-VASERG--GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PPQGYPPQGA
-28== ETEI-PAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPPQGYPPQG-
-29== HEALKLAERPERP-----------------EFVLQNA-----------------------
-30== HEALKLAERPERS-----------------EFVLQNS-----------------------
-31=p HEALKLAERPERS-----------------EFVLQNS-----------------------
-32== ------------------------------------------------------------
-33=p ------------------------------------------------------------
-34== ------------------------------------------------------------
-35=p ------------------------------------------------------------
-36== ------------------------------------------------------------
-
-
-1== --------------------------------------------PVSP-A---
-2== -----------------------------------------------P-A---
-3== --------------------------------------------QVSP-A---
-4=p -----------------------------------------------P-A---
-5=p -----------------------------------------------P-A---
-6== -----------------------------------------------PEK---
-7== --------------------------------------------QVGP-N---
-8=opsin, -----------------------------------------------------
-9== --------------------------------------------SVSP-A---
-10== --------------------------------------------SVSP-A---
-11== --------------------------------------------SVAP-A---
-12== --------------------------------------------SVSP-A---
-13== -------------------------------------------NKVMP-AHPV
-14== --------------------------------------------QTTA-A---
-15== --------------------------------------------QTTA-A---
-16== --------------------------------------------QTTA-A---
-17== --------------------------------------------QTSA-A---
-18== -------------------------------------------------A---
-19== -------------------------------------------------S---
-20== ----------------------------------------------SK-A---
-21== ----------------------------------------------SK-A---
-22== ----------------------------------------------SK-A---
-23== ----------------------------------------------SK-A---
-24== ----------------------------------------------SK-D---
-25== -------------------------------------------EK-SN-A---
-26== -------------------------------------------PKIPE-A---
-27== YPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQ-A---
-28== YPP-QGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQ-A---
-29== -------------------------------------------DYCRKKGHDS
-30== -------------------------------------------DHCGKKGHDT
-31=p -------------------------------------------DHCGKKGHDT
-32== -----------------------------------------------------
-33=p -----------------------------------------------------
-34== -----------------------------------------------------
-35=p -----------------------------------------------------
-36== -------------------------------------------------A-PQ
-
diff -r 6f28e90db932 -r bf28a8cff401 test-data/mafft_default.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mafft_default.aln Wed Mar 20 07:34:52 2024 +0000
@@ -0,0 +1,468 @@
+> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
+----------------------------------------MNG----------------T
+E--G--DNFYVP----FSNKTGLARSPYEYPQY-------YLAEPWK---------YSAL
+AAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSM
+N-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHAIM
+GVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYVVYMF
+VVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H
+QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------GKNPL
+GDDE--SGASTSKTEVSSVS-TSPVSPA--------------------------------
+---------------------------------------------------------
+> 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94]
+----------------------------------------MNG----------------T
+E--G--PNFYVP----FSNITGVVRSPFEQPQY-------YLAEPWQ---------FSML
+AAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSL
+H-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHAIM
+GVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMF
+VVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H
+QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------GKNPL
+GDDE--ASATASKTE------TSQVAPA--------------------------------
+---------------------------------------------------------
+> 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9
+----------------------------------------MNG----------------T
+E--G--INFYVP----MSNKTGVVRSPFEYPQY-------YLAEPWK---------YRLV
+CCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAW
+N-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHAMM
+GIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYVLYMF
+VIHFIIPVVVIFFSYGRLICKV----REAAAQQQ--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N
+KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------GKNPF
+GDEDVSSTVSQSKTEVSSVS-SSQVSPA--------------------------------
+---------------------------------------------------------
+> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
+----------------------------------------MNG----------------T
+E--G--KNFYVP----MSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKIL
+ALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAI
+N-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHAFA
+GIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYVIYMF
+VCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N
+KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------GKNPL
+GDDE-SSTVSTSKTEVSS------VSPA--------------------------------
+---------------------------------------------------------
+> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
+----------------------------------------MNG----------------T
+E--G--NNFYVP----LSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKLL
+AVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAI
+N-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHASA
+GIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYVLYMF
+ICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N
+KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------GKNPL
+GDEE-SSTVSTSKTEVSS------VSPA--------------------------------
+---------------------------------------------------------
+> 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208-
+----------------------------------------MKQ----------------V
+PEFH--EDFYIPIPLDINNLS--AYSPFLVPQD-------HLGNQGI---------FMAM
+SVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFF
+N-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHAIA
+GCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYVMFLF
+CFCFAVPFGTIVFCYGQLLITL----KLAAKAQA--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-H
+RGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC--------GKN-I
+EEDE--ASTSSQVTQVSS------VAPEK-------------------------------
+---------------------------------------------------------
+> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
+----------------------------------------MRK----------------M
+S--E--EEFYL-----FKNIS--SVGPWDGPQY-------HIAPVWA---------FYLQ
+AAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASC
+N-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHALT
+VVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYTWFLF
+IFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-N
+RNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC--------GKA-M
+TDES--DTCSSQKTEVSTVS-STQVGPN--------------------------------
+---------------------------------------------------------
+> 8=opsin, greensensitive human (fragment) S07060
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------DLAETVIA-STISIVNQV
+S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV
+GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM
+VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------ESESTQK------AEKEVTRMVVVMVLAFC-----------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------------------------
+> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
+----------------------------------------MAQQWSLQRLAGRHPQDSYE
+DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT
+SVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQV
+Y-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLAIV
+GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM
+VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-N
+PGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V
+DDGS--ELSSASKTEVSSV---SSVSPA--------------------------------
+---------------------------------------------------------
+> 10== Z68193 1 human Red Opsin <>[]
+----------------------------------------MAQQWSLQRLAGRHPQDSYE
+DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT
+SVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQV
+S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV
+GIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM
+VTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-N
+PGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V
+DDGS--ELSSASKTEVSSV---SSVSPA--------------------------------
+---------------------------------------------------------
+> 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92]
+----------------------------------------MTEAWNVAVFAARRSRDD-D
+DTTR--GSVFT-----YTNTNN-TRGPFEGPNY-------HIAPRWV---------YNLV
+SFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQI
+F-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLAII
+GIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLM
+ITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-N
+PGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF---------GKK-V
+DDGS--EASTTSRTEVSSVS-NSSVAPA--------------------------------
+---------------------------------------------------------
+> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
+----------------------------------------MAA-WEAAFAARRRHEE--E
+DTTR--DSVFT-----YTNSNN-TRGPFEGPNY-------HIAPRWV---------YNLT
+SVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQI
+S-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLAVA
+GILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLM
+VTCCFFPLAIIILCYLQVWLAI----RAVAAQQK--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-N
+PGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF---------GKK-V
+DDGS--EVST-SRTEVSSVS-NSSVSPA--------------------------------
+---------------------------------------------------------
+> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
+----------------------------------------MS-----------------S
+NSSQ--AP-----------PNG-TPGPFDGPQW------PYQAPQST---------YVGV
+AVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNI
+N-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHAVS
+GCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSYILSLF
+VTCFVLPLSLILFSYTNLLLTL----RAAAAQQK--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-H
+KGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY----QPQR-T
+GKAS--PGTPGPHADVTAAGLRNKVMPAHPV-----------------------------
+---------------------------------------------------------
+> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
+----------MESGNVSS------------SLFGNVST-ALRP----------------E
+ARLS--A----------ETRLLGWNVPPEELR--------HIPEHWLTYPEPPESMNYLL
+GTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSF
+H-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIA
+MIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF
+FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK-------------------------
+--------------------------------------------------MN--------
+--VESL------------------------------------------------------
+----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-G
+DKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------N
+EKAP--ESSAVASTSTTQEP--QQTTAA--------------------------------
+---------------------------------------------------------
+> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
+----------MEYHNVSS------------VL-GNVSS-VLRP----------------D
+ARLS--A----------ESRLLGWNVPPDELR--------HIPEHWLIYPEPPESMNYLL
+GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSF
+H-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIA
+MIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF
+FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK-------------------------
+--------------------------------------------------MN--------
+--VDSL------------------------------------------------------
+----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-G
+DKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------S
+EKAP--ESRAAISTSTTQEQ--QQTTAA--------------------------------
+---------------------------------------------------------
+> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
+----------ME------------------PL-CNASEPPLRP----------------E
+AR-S--SG---N----GDLQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML
+GVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSF
+H-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI
+MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF
+FFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK-------------------------
+--------------------------------------------------MN--------
+--VESL------------------------------------------------------
+----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G
+DKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------N
+EKSG--EISSAQST-TTQEQ--QQTTAA--------------------------------
+---------------------------------------------------------
+> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
+----------MD------------------AL-CNASEPPLRP----------------E
+ARMS--SG---S----DELQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML
+GVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSF
+H-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI
+MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF
+LFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK-------------------------
+--------------------------------------------------MN--------
+--VESL------------------------------------------------------
+----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G
+DKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------N
+EKSG--EASSAQST-TTQEQ-TQQTSAA--------------------------------
+---------------------------------------------------------
+> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1
+--------------------------------MTNATGPQMAY----------------Y
+GAAS--MD-FGY----PEGVSIVDFVRPEIKP--------YVHQHWYNYPPVNPMWHYLL
+GVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCF
+SGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKAVV
+FALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYNIFIF
+VFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK-------------------------
+--------------------------------------------------MN--------
+--VSTL------------------------------------------------------
+----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-G
+DTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HE-T
+ETKS--NDDSQSNSTVAQDK-A--------------------------------------
+---------------------------------------------------------
+> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1
+--------------------------------MANVTGPQMAF----------------Y
+GSGA--AT-FGY----PEGMTVADFVPDRVKH--------MVLDHWYNYPPVNPMWHYLL
+GVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCF
+SGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKATF
+MCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYNICIF
+IFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK-------------------------
+--------------------------------------------------MN--------
+--VTNL------------------------------------------------------
+----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-G
+NAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HE-K
+DPND--VEENQSSNTQTQEK-S--------------------------------------
+---------------------------------------------------------
+> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
+----------ME----SF------------AVAAAQLGPHFAP----------------L
+S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL
+TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
+F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG
+KM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYLIFYS
+IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------------------
+--------------------------------------------------MN--------
+--VKSL------------------------------------------------------
+----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K
+F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V
+DDGK--SSDAQSQA-TASEA-ESKA-----------------------------------
+---------------------------------------------------------
+> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
+----------ME----SF------------AVAAAQLGPHFAP----------------L
+S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL
+TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
+F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG
+KIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS
+IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------------------
+--------------------------------------------------MN--------
+--VKSL------------------------------------------------------
+----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K
+F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V
+DDGK--SSDAQSQA-TASEA-ESKA-----------------------------------
+---------------------------------------------------------
+> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
+----------MD----SF------------AAVATQLGPQFAA----------------P
+S-----------------NGSVVDKVTPDMAH--------LISPYWDQFPAMDPIWAKIL
+TAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
+F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG
+KIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS
+IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------------------
+--------------------------------------------------MN--------
+--VKSL------------------------------------------------------
+----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-K
+F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V
+DDGK--SSEAQSQA-TTSEA-ESKA-----------------------------------
+---------------------------------------------------------
+> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
+-----MERSHLP----ET------------PFDLAHSGPRFQA----------------Q
+SSG---------------NGSVLDNVLPDMAH--------LVNPYWSRFAPMDPMMSKIL
+GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY
+Y-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM
+KILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYLITYS
+LFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK-------------------------
+--------------------------------------------------MN--------
+--VKSL------------------------------------------------------
+----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-K
+I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GN-T
+DEPK--PDAPASDTETTSEA-DSKA-----------------------------------
+---------------------------------------------------------
+> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
+-----MERSLLP----EP------------PLAMALLGPRFEA----------------Q
+TGG---------------NRSVLDNVLPDMAP--------LVNPHWSRFAPMDPTMSKIL
+GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY
+Y-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM
+KIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYLITYS
+LFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK-------------------------
+--------------------------------------------------MN--------
+--VKSL------------------------------------------------------
+----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-K
+I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GT-T
+DEPK--PDAPPSDTETTSEA-ESKD-----------------------------------
+---------------------------------------------------------
+> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
+--------------------------------MIAVSGPSYEA----------------F
+SYGG--QARF-------NNQTVVDKVPPDMLH--------LIDANWYQYPPLNPMWHGIL
+GFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCY
+Y-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGALI
+RIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYLVCYG
+IWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK-------------------------
+--------------------------------------------------MN--------
+--VASL------------------------------------------------------
+----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-N
+L-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------A-A
+EPSS--DAVSTTSGTTTVTD-NEKSNA---------------------------------
+---------------------------------------------------------
+> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
+----------------------------------MANQLSYSS----------------L
+GWPY--QP----------NASVVDTMPKEMLY--------MIHEHWYAFPPMNPLWYSIL
+GVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCF
+A-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKATL
+LLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYVVIYG
+LAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK-------------------------
+--------------------------------------------------MN--------
+--VASL------------------------------------------------------
+----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-S
+SGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GS-G
+ESGS--DVKSEASATTTMEE-KPKIPEA--------------------------------
+---------------------------------------------------------
+> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
+---------------------------------------MVES----------------T
+TLVN--QT-WWY------NPTVD------------------IHPHWAKFDPIPDAVYYSV
+GIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAF
+M-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFL
+MIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFILCMY
+FCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR-------------------------
+--------------------------------------------------LN--------
+--AKEL------------------------------------------------------
+----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-G
+PAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKE-C
+EDAN--DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP
+QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
+> 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93]
+----------------------------------------MGR----------------D
+LRDN--ET-WWY------NPSIV------------------VHPHWREFDQVPDAVYYSL
+GIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCF
+L-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFI
+MIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNILCMF
+ILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR-------------------------
+--------------------------------------------------LN--------
+--AKEL------------------------------------------------------
+----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-G
+PLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKE-T
+EDDK--DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP
+QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA
+> 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
+------------------------------------------------------------
+------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI
+GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
+IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
+MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST
+AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF----------------------
+---------------------------------PGFPRV----QPESVISLNG-------
+--VVKL----------------------QK---------EVEECAN--------------
+----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
+GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR----NINR--
+------KLSAAGMHEALKLAERPERSEF------------VLQNSDH-------------
+--------------------------------------------CGKKGHDT-----
+> 31=p A47425 serotonin receptor 5HT-7 - rat
+------------------------------------------------------------
+------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI
+GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
+IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
+MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST
+AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF----------------------
+---------------------------------PGFPRV----QPESVISLNG-------
+--VVKL----------------------QK---------EVEECAN--------------
+----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
+GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR--
+------KLSAAGMHEALKLAERPERSEF------------VLQNSDH-------------
+--------------------------------------------CGKKGHDT-----
+> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
+----------MDVLSP--------------------------------------------
+---------------GQGNNTTSPPAPFET-GGNTTGISDVT---------VSYQ--VIT
+SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV
+L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA
+LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST
+FGAFYIPLLLMLVLYGRIF-------RAARFRIRK-------------------------
+--------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR--
+-------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG--
+PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C
+ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RQ----
+------------------------------------------------------------
+---------------------------------------------------------
+> 33=p A35181 serotonin receptor class 1A - rat
+----------MDVFSF--------------------------------------------
+---------------GQGNNTTASQEPFGT-GGNVTSISDVT---------FSYQ--VIT
+SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV
+L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA
+LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST
+FGAFYIPLLLMLVLYGRIF-------RAARFRIRK-------------------------
+--------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG--
+-------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG--
+SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C
+ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RR----
+------------------------------------------------------------
+---------------------------------------------------------
+> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
+MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T
+SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT
+SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI
+S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL
+MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST
+VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY
+SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS---
+-------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS--
+-----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V
+DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR--
+------------------------------------------------------------
+---------------------------------------------------------
+> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
+MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T
+SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT
+SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI
+S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL
+MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST
+VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY
+SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS---
+-------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS--
+-----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V
+DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR--
+------------------------------------------------------------
+---------------------------------------------------------
+> 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi
+-MEGAEGQEELDWEAL-------YLRLP--LQNCSWNSTGWEPNWNV------------T
+VVPN--TTWW------------QASAPFDTPAALVRAAAK--------------------
+AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEV
+V-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGM
+MIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFAT
+ASSFYVPVLIILILYWRIY-------QTARKRIR--------------------------
+-------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIA
+AAVVAVIGRPLPTISETTTTGFTNVSS----NNTSP---EKQSCANGLEADPPTTGYGAV
+AAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-C
+DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV----RRRR--
+------A---------------PQ------------------------------------
+---------------------------------------------------------
diff -r 6f28e90db932 -r bf28a8cff401 test-data/mafft_explicit_amino_blosum80.clustal.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mafft_explicit_amino_blosum80.clustal.aln Wed Mar 20 07:34:52 2024 +0000
@@ -0,0 +1,458 @@
+CLUSTAL format alignment by MAFFT FFT-NS-2 (v7.520)
+
+
+1== ----------------------------------------MN------------------
+2== ----------------------------------------MN------------------
+3== ----------------------------------------MN------------------
+4=p ----------------------------------------MN------------------
+5=p ----------------------------------------MN------------------
+6== ----------------------------------------MK------------------
+7== ----------------------------------------MR------------------
+8=opsin, ------------------------------------------------------------
+9== ----------------------------------------MAQQWSLQRLAGRHPQDS--
+10== ----------------------------------------MAQQWSLQRLAGRHPQDS--
+11== ----------------------------------------MTEAWNVAVFAARRSRDD--
+12== ----------------------------------------MA-AWEAAFAARRRHEE---
+13== ----------------------------------------MS------------------
+14== ----------MESGNVS-------------SSLFGNVSTALR------------------
+15== ----------MEYHNVS-------------SVL-GNVSSVLR------------------
+16== ----------ME-------------------PLCNASEPPLR------------------
+17== ----------MD-------------------ALCNASEPPLR------------------
+18== --------------------------------MTNATGPQMA------------------
+19== --------------------------------MANVTGPQMA------------------
+20== ----------ME---SF-------------AVAAAQLGPHFA------------------
+21== ----------ME---SF-------------AVAAAQLGPHFA------------------
+22== ----------MD---SF-------------AAVATQLGPQFA------------------
+23== -----MERSHLP---ET-------------PFDLAHSGPRFQ------------------
+24== -----MERSLLP---EP-------------PLAMALLGPRFE------------------
+25== --------------------------------MIAVSGPSYE------------------
+26== ----------------------------------MANQLSYS------------------
+27== ---------------------------------------MVE------------------
+28== ----------------------------------------MG------------------
+29== ---------MMDVNSSGRPDLYGHLRSFL-LPEVGRGLPDLSPDGGADPVAG-SWAPHLL
+30== -------------------------------------------------------MPHLL
+31=p -------------------------------------------------------MPHLL
+32== ----------MDVLSP--------------------------------------------
+33=p ----------MDVFSF--------------------------------------------
+34== MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYG---L
+35=p MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYG---L
+36== -MEGAEGQEELDWEAL-------YLRLP--LQNCSWNSTGWEPNWNVTVVPNTTW-----
+
+
+1== -----GTE--GDN-FYVP----FSNKTGLARSPYEYPQY-YLAEPWK-----------YS
+2== -----GTE--GPN-FYVP----FSNITGVVRSPFEQPQY-YLAEPWQ-----------FS
+3== -----GTE--GIN-FYVP----MSNKTGVVRSPFEYPQY-YLAEPWK-----------YR
+4=p -----GTE--GKN-FYVP----MSNRTGLVRSPFEYPQY-YLAEPWQ-----------FK
+5=p -----GTE--GNN-FYVP----LSNRTGLVRSPFEYPQY-YLAEPWQ-----------FK
+6== -----QVPEFHED-FYIPIPLDINNLSAY--SPFLVPQD-HLGNQGI-----------FM
+7== -----KMS--EEE-FYL-----FKNISSV--GPWDGPQY-HIAPVWA-----------FY
+8=opsin, ------------------------------------------------------------
+9== ----YEDSTQSSI-FT------YTNSNST-RGPFEGPNY-HIAPRWV-----------YH
+10== ----YEDSTQSSI-FT------YTNSNST-RGPFEGPNY-HIAPRWV-----------YH
+11== -----DDTTRGSV-FT------YTNTNNT-RGPFEGPNY-HIAPRWV-----------YN
+12== -----EDTTRDSV-FT------YTNSNNT-RGPFEGPNY-HIAPRWV-----------YN
+13== -----SNSSQAP-------------PNGT-PGPFDGPQWPYQAPQST-----------YV
+14== -----PEARLSA-----------ETRLLGWNVPPEELR--HIPEHWLT--YPEPPESMNY
+15== -----PDARLSA-----------ESRLLGWNVPPDELR--HIPEHWLI--YPEPPESMNY
+16== -----PEAR-SSG---N-----GDLQFLGWNVPPDQIQ--YIPEHWLT--QLEPPASMHY
+17== -----PEARMSSG---S-----DELQFLGWNVPPDQIQ--YIPEHWLT--QLEPPASMHY
+18== -----YYGAASMD-FGY-----PEGVSIVDFVRPEIKP--YVHQHWYN--YPPVNPMWHY
+19== -----FYGSGAAT-FGY-----PEGMTVADFVPDRVKH--MVLDHWYN--YPPVNPMWHY
+20== -----PLS----------------NGSVVDKVTPDMAH--LISPYWNQ--FPAMDPIWAK
+21== -----PLS----------------NGSVVDKVTPDMAH--LISPYWNQ--FPAMDPIWAK
+22== -----APS----------------NGSVVDKVTPDMAH--LISPYWDQ--FPAMDPIWAK
+23== -----AQSSG--------------NGSVLDNVLPDMAH--LVNPYWSR--FAPMDPMMSK
+24== -----AQTGG--------------NRSVLDNVLPDMAP--LVNPHWSR--FAPMDPTMSK
+25== -----AFSYGGQARF--------NNQTVVDKVPPDMLH--LIDANWYQ--YPPLNPMWHG
+26== -----SLGWPYQP-----------NASVVDTMPKEMLY--MIHEHWYA--FPPMNPLWYS
+27== -----STTLVNQT-WWY-------NPTVD------------IHPHWAK--FDPIPDAVYY
+28== -----RDLRDNET-WWY-------NPSIV------------VHPHWRE--FDQVPDAVYY
+29== S---EVTASPAPT-WDAP----PDNASGCGEQIN----------------YGRVE---KV
+30== SGFLEVTASPAPT-WDAP----PDNVSGCGEQIN----------------YGRVE---KV
+31=p SGFLEVTASPAPT-WDAP----PDNVSGCGEQIN----------------YGRVE---KV
+32== -GQGNNTTSPPAP-FET-----GGNTTGISDVT-----------------VSYQ-----V
+33=p -GQGNNTTASQEP-FGT-----GGNVTSISDVT-----------------FSYQ-----V
+34== TGQFINGSHSSRS-RDNA----SANDTSATNMTDD--------RYWSLTVYSHEH---LV
+35=p TGQFINGSHSSRS-RDNA----SANDTSATNMTDD--------RYWSLTVYSHEH---LV
+36== -------WQASAP-FDTP----AALVRAAAK-----------------------------
+
+
+1== ALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYT
+2== MLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYT
+3== LVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYT
+4=p ILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYT
+5=p LLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYT
+6== AMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYS
+7== LQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVA
+8=opsin, --------------------------------------------DLAETVIA-STISIVN
+9== LTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVN
+10== LTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVN
+11== LVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFN
+12== LTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVIN
+13== GVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSN
+14== LLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYN
+15== LLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYN
+16== MLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--N
+17== MLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYN
+18== LLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYN
+19== LLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYN
+20== ILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGIN
+21== ILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGIN
+22== ILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGIN
+23== ILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIIN
+24== ILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIIN
+25== ILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVIN
+26== ILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSN
+27== SVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTIS
+28== SLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTIS
+29== VIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVT
+30== VIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVT
+31=p VIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVT
+32== ITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALY
+33=p ITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALY
+34== LTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVS
+35=p LTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVS
+36== --AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVY
+ .
+
+1== SMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHA
+2== SLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHA
+3== AWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHA
+4=p AIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHA
+5=p AIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHA
+6== FFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHA
+7== SCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHA
+8=opsin, QVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLA
+9== QVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLA
+10== QVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLA
+11== QIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLA
+12== QIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLA
+13== NIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHA
+14== SFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKA
+15== SFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKA
+16== SFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKA
+17== SFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKA
+18== CFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKA
+19== CFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKA
+20== LYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLA
+21== LYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLA
+22== LYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLA
+23== FYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTS
+24== FYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTS
+25== CYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGA
+26== CFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKA
+27== AFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRA
+28== CFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRA
+29== DLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCM
+30== DLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCM
+31=p DLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCM
+32== QVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRA
+33=p QVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRA
+34== EIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRI
+35=p EIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRI
+36== EVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRV
+ : : * : : :*: : :
+
+1== IMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYVVY
+2== IMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFVIY
+3== MMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYVLY
+4=p FAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYVIY
+5=p SAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYVLY
+6== IAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYVMF
+7== LTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYTWF
+8=opsin, IVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIV
+9== IVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIV
+10== IVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIV
+11== IIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFMLT
+12== VAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYMVV
+13== VSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSYILS
+14== IAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVAC
+15== IAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVAC
+16== VIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGT
+17== VIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGT
+18== VVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYNIF
+19== TFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYNIC
+20== LGKM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYLIF
+21== LGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIF
+22== LGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIF
+23== IMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYLIT
+24== IMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYLIT
+25== LIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYLVC
+26== TLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYVVI
+27== FLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFILC
+28== FIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNILC
+29== AKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIY
+30== AKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIY
+31=p AKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIY
+32== AALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIY
+33=p AALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIY
+34== LLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIF
+35=p LLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIF
+36== GMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIF
+ :
+
+1== MFVVHFLVPFVIIFFCYGRLLCTV----KE------------------------------
+2== MFVVHFTIPMIVIFFCYGQLVFTV----KE------------------------------
+3== MFVIHFIIPVVVIFFSYGRLICKV----RE------------------------------
+4=p MFVCHFILPVAVIFFTYGRLVCTV----KA------------------------------
+5=p MFICHFILPVTIIFFTYGRLVCTV----KA------------------------------
+6== LFCFCFAVPFGTIVFCYGQLLITL----KL------------------------------
+7== LFIFCFIVPLSLICFSYTQLLRAL----KA------------------------------
+8=opsin, LMVTCCITPLSIIVLCYLQVWLAI----RA------------------------------
+9== LMVTCCITPLSIIVLCYLQVWLAI----RA------------------------------
+10== LMVTCCIIPLAIIMLCYLQVWLAI----RA------------------------------
+11== LMITCCFLPLFIIIVCYLQVWMAI----RA------------------------------
+12== LMVTCCFFPLAIIILCYLQVWLAI----RA------------------------------
+13== LFVTCFVLPLSLILFSYTNLLLTL----RA------------------------------
+14== IFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------------
+15== IFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------------
+16== IFFFSFVCPTLMILYYYSQIVGHVFSHEKA------------------------------
+17== IFLFSFVVPTLMILYYYSQIVGHVFNHEKA------------------------------
+18== IFVFDYFLPAAIIVFSYVFIVKAIFAHEAA------------------------------
+19== IFIFDFFLPASVIVFSYVFIVKAIFAHEAA------------------------------
+20== YSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------------
+21== YSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------------
+22== YSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------------
+23== YSLFVYYTPLFLICYSYWFIIAAVAAHEKA------------------------------
+24== YSLFVYYTPLFMICYSYWFIIATVAAHEKA------------------------------
+25== YGIWVYFVPLFLIIYSYWFIIQAVAAHEKN------------------------------
+26== YGLAVYFLPLITMIYCYFFIVHAVAEHEKQ------------------------------
+27== MYFCGFMLPIIIIAFCYFNIVMSVSNHEKE------------------------------
+28== MFILGFFGPILIIFFCYFNIVMSVSNHEKE------------------------------
+29== STAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-----------------PGF-------
+30== STAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-----------------PGF-------
+31=p STAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-----------------PGF-------
+32== STFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------------
+33=p STFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------------
+34== STVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSD
+35=p STVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSD
+36== ATASSFYVPVLIILILYWRIYQTARKRIR-------------------------------
+ * : * :
+
+1== --------------------------AAAAQQ----------------------------
+2== --------------------------AAAQQQ----------------------------
+3== --------------------------AAAQQQ----------------------------
+4=p --------------------------AAAQQQ----------------------------
+5=p --------------------------AAAQQQ----------------------------
+6== --------------------------AAKAQA----------------------------
+7== --------------------------VAAQQQ----------------------------
+8=opsin, --------------------------VAKQQK----------------------------
+9== --------------------------VAKQQK----------------------------
+10== --------------------------VAKQQK----------------------------
+11== --------------------------VAAQQK----------------------------
+12== --------------------------VAAQQK----------------------------
+13== --------------------------AAAQQK----------------------------
+14== --------------------------LRDQAKKM----------------------NVES
+15== --------------------------LRDQAKKM----------------------NVDS
+16== --------------------------LREQAKKM----------------------NVES
+17== --------------------------LREQAKKM----------------------NVES
+18== --------------------------MRAQAKKM----------------------NVST
+19== --------------------------MRAQAKKM----------------------NVTN
+20== --------------------------MREQAKKM----------------------NVKS
+21== --------------------------MREQAKKM----------------------NVKS
+22== --------------------------MREQAKKM----------------------NVKS
+23== --------------------------MREQAKKM----------------------NVKS
+24== --------------------------MRDQAKKM----------------------NVKS
+25== --------------------------MREQAKKM----------------------NVAS
+26== --------------------------LREQAKKM----------------------NVAS
+27== --------------------------MAAMAKRL----------------------NAKE
+28== --------------------------MAAMAKRL----------------------NAKE
+29== --------------------------PRVEPD------------------------SVIA
+30== --------------------------PRVQPE------------------------SVIS
+31=p --------------------------PRVQPE------------------------SVIS
+32== ---------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR-------
+33=p ---------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG-------
+34== CNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--------
+35=p CNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--------
+36== --------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIAAAVVA
+
+
+1== ------------------------------------------------------------
+2== ------------------------------------------------------------
+3== ------------------------------------------------------------
+4=p ------------------------------------------------------------
+5=p ------------------------------------------------------------
+6== ------------------------------------------------------------
+7== ------------------------------------------------------------
+8=opsin, ------------------------------------------------------------
+9== ------------------------------------------------------------
+10== ------------------------------------------------------------
+11== ------------------------------------------------------------
+12== ------------------------------------------------------------
+13== ------------------------------------------------------------
+14== L-----------------------------------------------------------
+15== L-----------------------------------------------------------
+16== L-----------------------------------------------------------
+17== L-----------------------------------------------------------
+18== L-----------------------------------------------------------
+19== L-----------------------------------------------------------
+20== L-----------------------------------------------------------
+21== L-----------------------------------------------------------
+22== L-----------------------------------------------------------
+23== L-----------------------------------------------------------
+24== L-----------------------------------------------------------
+25== L-----------------------------------------------------------
+26== L-----------------------------------------------------------
+27== L-----------------------------------------------------------
+28== L-----------------------------------------------------------
+29== L-----------------NGIVKLQ--------KEVEECAN------------------L
+30== L-----------------NGVVKLQ--------KEVEECAN------------------L
+31=p L-----------------NGVVKLQ--------KEVEECAN------------------L
+32== --NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCA--
+33=p --DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYA--
+34== --SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-------
+35=p --SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-------
+36== VIGRPLPTISETTTTGFTNVSS----NNTS---PEKQSCANGLEADPPTTGYGAVAAAYY
+
+
+1== --------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-HQGS--
+2== --------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-HQGS--
+3== --------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-NKGA--
+4=p --------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-NKGA--
+5=p --------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-NKGA--
+6== --------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-HRGE--
+7== --------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-NRNH--
+8=opsin, --------ESESTQK------AEKEVTRMVVVMVLAFC----------------------
+9== --------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-NPGY--
+10== --------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-NPGY--
+11== --------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-NPGY--
+12== --------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-NPGY--
+13== --------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-HKGI--
+14== -----RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-GDKT--
+15== -----RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-GDKT--
+16== -----RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDKS--
+17== -----RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDKS--
+18== -----RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-GDTS--
+19== -----RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-GNAE--
+20== -----RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-KF-E--
+21== -----RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-KF-E--
+22== -----RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-KF-E--
+23== -----RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-KI-D--
+24== -----RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-KI-D--
+25== -----RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-NL-V--
+26== -----RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-SSGT--
+27== -----R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-GPAE--
+28== -----R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-GPLE--
+29== SRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCS
+30== SRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCS
+31=p SRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCS
+32== PASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSC-
+33=p PACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSC-
+34== ------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE--
+35=p ------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE--
+36== PSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-CDCE--
+ * . :
+
+1== DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------GKNPLGDDE-
+2== NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------GKNPLGDDE-
+3== DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------GKNPFGDEDV
+4=p DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------GKNPLGDDE-
+5=p AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------GKNPLGDEE-
+6== EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMV-C--------GKN-IEEDE-
+7== GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMV-C--------GKA-MTDES-
+8=opsin, ------------------------------------------------------------
+9== PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF----------GKK-VDDGS-
+10== AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF----------GKK-VDDGS-
+11== AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF----------GKK-VDDGS-
+12== AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF----------GKK-VDDGS-
+13== IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLCCG-----YQPQR-TGKAS-
+14== LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------NEKAP-
+15== LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------SEKAP-
+16== LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------NEKSG-
+17== LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------NEKSG-
+18== GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HE-TETKS-
+19== GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HE-KDPND-
+20== GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-VDDGK-
+21== GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-VDDGK-
+22== GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-VDDGK-
+23== GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GN-TDEPK-
+24== GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GT-TDEPK-
+25== KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------A-AEPSS-
+26== RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GS-GESGS-
+27== WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKE-CEDAN-
+28== WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKE-TEDDK-
+29== CIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR---------------
+30== CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR---------------
+31=p CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR---------------
+32== HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC---------------
+33=p HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC---------------
+34== GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR---------------
+35=p GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR---------------
+36== -VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV---------------
+
+
+1== -SGASTSKTEVSSVS-TSPVSPA-------------------------------------
+2== -ASATASKTE------TSQVAPA-------------------------------------
+3== SSTVSQSKTEVSSVS-SSQVSPA-------------------------------------
+4=p SSTVSTSKTEVSS------VSPA-------------------------------------
+5=p SSTVSTSKTEVSS------VSPA-------------------------------------
+6== -ASTSSQVTQVSS------VAPEK------------------------------------
+7== -DTCSSQKTEVSTVS-STQVGPN-------------------------------------
+8=opsin, ------------------------------------------------------------
+9== -ELSSASKTEVSSV---SSVSPA-------------------------------------
+10== -ELSSASKTEVSSV---SSVSPA-------------------------------------
+11== -EASTTSRTEVSSVS-NSSVAPA-------------------------------------
+12== -EVST-SRTEVSSVS-NSSVSPA-------------------------------------
+13== -PGTPGPHADVTAAGLRNKVMPAHPV----------------------------------
+14== -ESSAVASTSTTQEP--QQTTAA-------------------------------------
+15== -ESRAAISTSTTQEQ--QQTTAA-------------------------------------
+16== -EISSAQST-TTQEQ--QQTTAA-------------------------------------
+17== -EASSAQST-TTQEQ-TQQTSAA-------------------------------------
+18== -NDDSQSNSTVAQDK-A-------------------------------------------
+19== -VEENQSSNTQTQEK-S-------------------------------------------
+20== -SSDAQSQA-TASEA-ESKA----------------------------------------
+21== -SSDAQSQA-TASEA-ESKA----------------------------------------
+22== -SSEAQSQA-TTSEA-ESKA----------------------------------------
+23== -PDAPASDTETTSEA-DSKA----------------------------------------
+24== -PDAPPSDTETTSEA-ESKD----------------------------------------
+25== -DAVSTTSGTTTVTD-NEKSNA--------------------------------------
+26== -DVKSEASATTTMEE-KPKIPEA-------------------------------------
+27== -DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PPQGYPP
+28== -DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPPQGYPP
+29== -NINRKLSAAGMHEALKLAERPERPEFVL--------QNADY------------------
+30== -NINRKLSAAGMHEALKLAERPERSEFVL--------QNSDH------------------
+31=p -NINRKLSAAGMHEALKLAERPERSEFVL--------QNSDH------------------
+32== -RQ---------------------------------------------------------
+33=p -RR---------------------------------------------------------
+34== -RGHR-------------------------------------------------------
+35=p -RGHR-------------------------------------------------------
+36== -RRRRA---------------PQ-------------------------------------
+
+
+1== ----------------------------------------------------
+2== ----------------------------------------------------
+3== ----------------------------------------------------
+4=p ----------------------------------------------------
+5=p ----------------------------------------------------
+6== ----------------------------------------------------
+7== ----------------------------------------------------
+8=opsin, ----------------------------------------------------
+9== ----------------------------------------------------
+10== ----------------------------------------------------
+11== ----------------------------------------------------
+12== ----------------------------------------------------
+13== ----------------------------------------------------
+14== ----------------------------------------------------
+15== ----------------------------------------------------
+16== ----------------------------------------------------
+17== ----------------------------------------------------
+18== ----------------------------------------------------
+19== ----------------------------------------------------
+20== ----------------------------------------------------
+21== ----------------------------------------------------
+22== ----------------------------------------------------
+23== ----------------------------------------------------
+24== ----------------------------------------------------
+25== ----------------------------------------------------
+26== ----------------------------------------------------
+27== QGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
+28== QGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA
+29== ---------------------------------------CRKKGHDS-----
+30== ---------------------------------------CGKKGHDT-----
+31=p ---------------------------------------CGKKGHDT-----
+32== ----------------------------------------------------
+33=p ----------------------------------------------------
+34== ----------------------------------------------------
+35=p ----------------------------------------------------
+36== ----------------------------------------------------
+
diff -r 6f28e90db932 -r bf28a8cff401 test-data/mafft_fftns_result.aln
--- a/test-data/mafft_fftns_result.aln Tue Oct 31 15:48:53 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,468 +0,0 @@
-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
-----------------------------------------MNG----------------T
-E--G--DNFYVP----FSNKTGLARSPYEYPQY-------YLAEPWK---------YSAL
-AAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSM
-N-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHAIM
-GVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYVVYMF
-VVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H
-QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------GKNPL
-GDDE--SGASTSKTEVSSVS-TSPVSPA--------------------------------
----------------------------------------------------------
-> 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94]
-----------------------------------------MNG----------------T
-E--G--PNFYVP----FSNITGVVRSPFEQPQY-------YLAEPWQ---------FSML
-AAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSL
-H-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHAIM
-GVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMF
-VVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H
-QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------GKNPL
-GDDE--ASATASKTE------TSQVAPA--------------------------------
----------------------------------------------------------
-> 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9
-----------------------------------------MNG----------------T
-E--G--INFYVP----MSNKTGVVRSPFEYPQY-------YLAEPWK---------YRLV
-CCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAW
-N-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHAMM
-GIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYVLYMF
-VIHFIIPVVVIFFSYGRLICKV----REAAAQQQ--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N
-KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------GKNPF
-GDEDVSSTVSQSKTEVSSVS-SSQVSPA--------------------------------
----------------------------------------------------------
-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
-----------------------------------------MNG----------------T
-E--G--KNFYVP----MSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKIL
-ALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAI
-N-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHAFA
-GIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYVIYMF
-VCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N
-KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------GKNPL
-GDDE-SSTVSTSKTEVSS------VSPA--------------------------------
----------------------------------------------------------
-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
-----------------------------------------MNG----------------T
-E--G--NNFYVP----LSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKLL
-AVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAI
-N-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHASA
-GIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYVLYMF
-ICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N
-KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------GKNPL
-GDEE-SSTVSTSKTEVSS------VSPA--------------------------------
----------------------------------------------------------
-> 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208-
-----------------------------------------MKQ----------------V
-PEFH--EDFYIPIPLDINNLS--AYSPFLVPQD-------HLGNQGI---------FMAM
-SVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFF
-N-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHAIA
-GCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYVMFLF
-CFCFAVPFGTIVFCYGQLLITL----KLAAKAQA--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-H
-RGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC--------GKN-I
-EEDE--ASTSSQVTQVSS------VAPEK-------------------------------
----------------------------------------------------------
-> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
-----------------------------------------MRK----------------M
-S--E--EEFYL-----FKNIS--SVGPWDGPQY-------HIAPVWA---------FYLQ
-AAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASC
-N-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHALT
-VVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYTWFLF
-IFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-N
-RNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC--------GKA-M
-TDES--DTCSSQKTEVSTVS-STQVGPN--------------------------------
----------------------------------------------------------
-> 8=opsin, greensensitive human (fragment) S07060
-------------------------------------------------------------
-------------------------------------------------------------
-------------------------------------------DLAETVIA-STISIVNQV
-S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV
-GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM
-VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------ESESTQK------AEKEVTRMVVVMVLAFC-----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------------------
-> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-----------------------------------------MAQQWSLQRLAGRHPQDSYE
-DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT
-SVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQV
-Y-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLAIV
-GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM
-VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-N
-PGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V
-DDGS--ELSSASKTEVSSV---SSVSPA--------------------------------
----------------------------------------------------------
-> 10== Z68193 1 human Red Opsin <>[]
-----------------------------------------MAQQWSLQRLAGRHPQDSYE
-DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT
-SVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQV
-S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV
-GIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM
-VTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-N
-PGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V
-DDGS--ELSSASKTEVSSV---SSVSPA--------------------------------
----------------------------------------------------------
-> 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92]
-----------------------------------------MTEAWNVAVFAARRSRDD-D
-DTTR--GSVFT-----YTNTNN-TRGPFEGPNY-------HIAPRWV---------YNLV
-SFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQI
-F-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLAII
-GIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLM
-ITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-N
-PGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF---------GKK-V
-DDGS--EASTTSRTEVSSVS-NSSVAPA--------------------------------
----------------------------------------------------------
-> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
-----------------------------------------MAA-WEAAFAARRRHEE--E
-DTTR--DSVFT-----YTNSNN-TRGPFEGPNY-------HIAPRWV---------YNLT
-SVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQI
-S-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLAVA
-GILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLM
-VTCCFFPLAIIILCYLQVWLAI----RAVAAQQK--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-N
-PGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF---------GKK-V
-DDGS--EVST-SRTEVSSVS-NSSVSPA--------------------------------
----------------------------------------------------------
-> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-----------------------------------------MS-----------------S
-NSSQ--AP-----------PNG-TPGPFDGPQW------PYQAPQST---------YVGV
-AVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNI
-N-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHAVS
-GCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSYILSLF
-VTCFVLPLSLILFSYTNLLLTL----RAAAAQQK--------------------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-H
-KGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY----QPQR-T
-GKAS--PGTPGPHADVTAAGLRNKVMPAHPV-----------------------------
----------------------------------------------------------
-> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-----------MESGNVSS------------SLFGNVST-ALRP----------------E
-ARLS--A----------ETRLLGWNVPPEELR--------HIPEHWLTYPEPPESMNYLL
-GTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSF
-H-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIA
-MIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF
-FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK-------------------------
---------------------------------------------------MN--------
---VESL------------------------------------------------------
-----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-G
-DKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------N
-EKAP--ESSAVASTSTTQEP--QQTTAA--------------------------------
----------------------------------------------------------
-> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-----------MEYHNVSS------------VL-GNVSS-VLRP----------------D
-ARLS--A----------ESRLLGWNVPPDELR--------HIPEHWLIYPEPPESMNYLL
-GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSF
-H-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIA
-MIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF
-FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK-------------------------
---------------------------------------------------MN--------
---VDSL------------------------------------------------------
-----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-G
-DKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------S
-EKAP--ESRAAISTSTTQEQ--QQTTAA--------------------------------
----------------------------------------------------------
-> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-----------ME------------------PL-CNASEPPLRP----------------E
-AR-S--SG---N----GDLQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML
-GVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSF
-H-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI
-MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF
-FFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK-------------------------
---------------------------------------------------MN--------
---VESL------------------------------------------------------
-----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G
-DKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------N
-EKSG--EISSAQST-TTQEQ--QQTTAA--------------------------------
----------------------------------------------------------
-> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-----------MD------------------AL-CNASEPPLRP----------------E
-ARMS--SG---S----DELQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML
-GVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSF
-H-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI
-MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF
-LFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK-------------------------
---------------------------------------------------MN--------
---VESL------------------------------------------------------
-----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G
-DKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------N
-EKSG--EASSAQST-TTQEQ-TQQTSAA--------------------------------
----------------------------------------------------------
-> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1
---------------------------------MTNATGPQMAY----------------Y
-GAAS--MD-FGY----PEGVSIVDFVRPEIKP--------YVHQHWYNYPPVNPMWHYLL
-GVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCF
-SGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKAVV
-FALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYNIFIF
-VFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK-------------------------
---------------------------------------------------MN--------
---VSTL------------------------------------------------------
-----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-G
-DTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HE-T
-ETKS--NDDSQSNSTVAQDK-A--------------------------------------
----------------------------------------------------------
-> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1
---------------------------------MANVTGPQMAF----------------Y
-GSGA--AT-FGY----PEGMTVADFVPDRVKH--------MVLDHWYNYPPVNPMWHYLL
-GVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCF
-SGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKATF
-MCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYNICIF
-IFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK-------------------------
---------------------------------------------------MN--------
---VTNL------------------------------------------------------
-----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-G
-NAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HE-K
-DPND--VEENQSSNTQTQEK-S--------------------------------------
----------------------------------------------------------
-> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
-----------ME----SF------------AVAAAQLGPHFAP----------------L
-S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL
-TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
-F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG
-KM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYLIFYS
-IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------------------
---------------------------------------------------MN--------
---VKSL------------------------------------------------------
-----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K
-F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V
-DDGK--SSDAQSQA-TASEA-ESKA-----------------------------------
----------------------------------------------------------
-> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
-----------ME----SF------------AVAAAQLGPHFAP----------------L
-S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL
-TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
-F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG
-KIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS
-IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------------------
---------------------------------------------------MN--------
---VKSL------------------------------------------------------
-----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K
-F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V
-DDGK--SSDAQSQA-TASEA-ESKA-----------------------------------
----------------------------------------------------------
-> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
-----------MD----SF------------AAVATQLGPQFAA----------------P
-S-----------------NGSVVDKVTPDMAH--------LISPYWDQFPAMDPIWAKIL
-TAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
-F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG
-KIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS
-IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------------------
---------------------------------------------------MN--------
---VKSL------------------------------------------------------
-----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-K
-F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V
-DDGK--SSEAQSQA-TTSEA-ESKA-----------------------------------
----------------------------------------------------------
-> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
------MERSHLP----ET------------PFDLAHSGPRFQA----------------Q
-SSG---------------NGSVLDNVLPDMAH--------LVNPYWSRFAPMDPMMSKIL
-GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY
-Y-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM
-KILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYLITYS
-LFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK-------------------------
---------------------------------------------------MN--------
---VKSL------------------------------------------------------
-----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-K
-I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GN-T
-DEPK--PDAPASDTETTSEA-DSKA-----------------------------------
----------------------------------------------------------
-> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
------MERSLLP----EP------------PLAMALLGPRFEA----------------Q
-TGG---------------NRSVLDNVLPDMAP--------LVNPHWSRFAPMDPTMSKIL
-GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY
-Y-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM
-KIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYLITYS
-LFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK-------------------------
---------------------------------------------------MN--------
---VKSL------------------------------------------------------
-----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-K
-I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GT-T
-DEPK--PDAPPSDTETTSEA-ESKD-----------------------------------
----------------------------------------------------------
-> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
---------------------------------MIAVSGPSYEA----------------F
-SYGG--QARF-------NNQTVVDKVPPDMLH--------LIDANWYQYPPLNPMWHGIL
-GFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCY
-Y-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGALI
-RIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYLVCYG
-IWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK-------------------------
---------------------------------------------------MN--------
---VASL------------------------------------------------------
-----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-N
-L-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------A-A
-EPSS--DAVSTTSGTTTVTD-NEKSNA---------------------------------
----------------------------------------------------------
-> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-----------------------------------MANQLSYSS----------------L
-GWPY--QP----------NASVVDTMPKEMLY--------MIHEHWYAFPPMNPLWYSIL
-GVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCF
-A-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKATL
-LLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYVVIYG
-LAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK-------------------------
---------------------------------------------------MN--------
---VASL------------------------------------------------------
-----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-S
-SGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GS-G
-ESGS--DVKSEASATTTMEE-KPKIPEA--------------------------------
----------------------------------------------------------
-> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
----------------------------------------MVES----------------T
-TLVN--QT-WWY------NPTVD------------------IHPHWAKFDPIPDAVYYSV
-GIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAF
-M-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFL
-MIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFILCMY
-FCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR-------------------------
---------------------------------------------------LN--------
---AKEL------------------------------------------------------
-----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-G
-PAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKE-C
-EDAN--DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP
-QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
-> 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93]
-----------------------------------------MGR----------------D
-LRDN--ET-WWY------NPSIV------------------VHPHWREFDQVPDAVYYSL
-GIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCF
-L-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFI
-MIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNILCMF
-ILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR-------------------------
---------------------------------------------------LN--------
---AKEL------------------------------------------------------
-----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-G
-PLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKE-T
-EDDK--DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP
-QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA
-> 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
-------------------------------------------------------------
-------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI
-GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
-IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
-MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST
-AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF----------------------
----------------------------------PGFPRV----QPESVISLNG-------
---VVKL----------------------QK---------EVEECAN--------------
-----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
-GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR----NINR--
-------KLSAAGMHEALKLAERPERSEF------------VLQNSDH-------------
---------------------------------------------CGKKGHDT-----
-> 31=p A47425 serotonin receptor 5HT-7 - rat
-------------------------------------------------------------
-------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI
-GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
-IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
-MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST
-AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF----------------------
----------------------------------PGFPRV----QPESVISLNG-------
---VVKL----------------------QK---------EVEECAN--------------
-----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
-GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR--
-------KLSAAGMHEALKLAERPERSEF------------VLQNSDH-------------
---------------------------------------------CGKKGHDT-----
-> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-----------MDVLSP--------------------------------------------
----------------GQGNNTTSPPAPFET-GGNTTGISDVT---------VSYQ--VIT
-SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV
-L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA
-LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST
-FGAFYIPLLLMLVLYGRIF-------RAARFRIRK-------------------------
---------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR--
--------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG--
-PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C
-ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RQ----
-------------------------------------------------------------
----------------------------------------------------------
-> 33=p A35181 serotonin receptor class 1A - rat
-----------MDVFSF--------------------------------------------
----------------GQGNNTTASQEPFGT-GGNVTSISDVT---------FSYQ--VIT
-SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV
-L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA
-LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST
-FGAFYIPLLLMLVLYGRIF-------RAARFRIRK-------------------------
---------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG--
--------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG--
-SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C
-ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RR----
-------------------------------------------------------------
----------------------------------------------------------
-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T
-SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT
-SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI
-S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL
-MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST
-VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY
-SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS---
--------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS--
------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V
-DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR--
-------------------------------------------------------------
----------------------------------------------------------
-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T
-SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT
-SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI
-S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL
-MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST
-VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY
-SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS---
--------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS--
------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V
-DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR--
-------------------------------------------------------------
----------------------------------------------------------
-> 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi
--MEGAEGQEELDWEAL-------YLRLP--LQNCSWNSTGWEPNWNV------------T
-VVPN--TTWW------------QASAPFDTPAALVRAAAK--------------------
-AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEV
-V-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGM
-MIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFAT
-ASSFYVPVLIILILYWRIY-------QTARKRIR--------------------------
--------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIA
-AAVVAVIGRPLPTISETTTTGFTNVSS----NNTSP---EKQSCANGLEADPPTTGYGAV
-AAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-C
-DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV----RRRR--
-------A---------------PQ------------------------------------
----------------------------------------------------------
diff -r 6f28e90db932 -r bf28a8cff401 test-data/mafft_kimura40.phylip.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mafft_kimura40.phylip.aln Wed Mar 20 07:34:52 2024 +0000
@@ -0,0 +1,77 @@
+ 3 948
+MZ681498.1 atgcatgtat aagtataacc tgccagacag ggaaactgcg gacggctcat
+MZ681497.1 ---------- ---------- ---------- -----ctgcg gacggctcat
+ON855043.1 ---------- ---------- -ggccgtca- -----ctccg gggggacaac
+
+ tacaacagcc ttaatttact tgaccttgac aacctacttg gataactgcg
+ tacaacagcc ttaatttact tgaccttgac aacctacttg gataactgcg
+ acaaacgccc tgagggcttt actcgttggg gtgcaaactg ga--------
+
+ gtaattctgg agctaataca tgcaccaaag ctccgatccc tcgtggagag
+ gtaattctgg agctaataca tgcaccaaag ctccgatccc tcgtggagag
+ ---------- ---------- ---------- ---------- ---tcgagtg
+
+ gagcgcattt gttcgccaca aaaccaaacg ccccacaagg gcgttcagtg
+ gagcgcattt gttcgccaca aaaccaaacg ccccacaagg gcgttcagtg
+ gcgcacacat cccttccacg caa--agacc tgctgaagag gtcggaggcg
+
+ ttgactcaga ataact---- ---------- -------aag ctgatcgcac
+ ttgactcaga ataact---- ---------- -------aag ctgatcgcac
+ atgagtccga gcaaccccac aagcaaccag gttggggaag ctg--cacac
+
+ ggtctt---- ---------- ----gcaccg gcgacgtgtc tttcaagtgt
+ ggtctt---- ---------- ----gcaccg gcgacgtgtc tttcaagtgt
+ gatactggga tgcacgcccc cagggcacct aacggctgcc gctggcgtct
+
+ ctgccttatc aactttcgat ggtagtgtat ctgcctacca tggttgtgac
+ ctgccttatc aactttcgat ggtagtgtat ctgcctacca tggttgtgac
+ gtgcgtcgtt ga---gcagt tgttgcgcac ttgctt---- ttgtcggagc
+
+ gggtaacgga ggataagggt tcgactccgg agaaggggcc tgagaaatgg
+ gggtaacgga ggataagggt tcgactccgg agaaggggcc tgagaaatgg
+ tgtactcgga gcatgctggc atggacccac acaaaag--- ----------
+
+ ccactacgtc taaggatggc agcaggcgcg caaattaccc actctcaaca
+ ccactacgtc taaggatggc agcaggcgcg caaattaccc actctcaaca
+ ---------- --tgtgtggc agcggccaca ca------cc cctgtccatg
+
+ cgctgaggag gtagtgaaga gaaataacga gaccgttctc acatgaggcc
+ cgctgaggag gtagtgaaga gaaataacga gaccgttctc acatgaggcc
+ tcctacgga- ---------- -ccgtagcta gggcgtgct- ----------
+
+ ggtcatcgga atgggtacaa cttaaaccct ttaacgagta tctatgagag
+ ggtcatcgga atgggtacaa cttaaaccct ttaacgagta tctatgagag
+ ---------- ---------- ---------- ---------- ----------
+
+ ggcaagtctg gtgccagcag ccgcggtaat tccagctctc aaaatgcata
+ ggcaagtctg gtgccagcag ccgcggtaat tccagctctc aaaatgcata
+ ---------- ---------- -ctgggtttc ttcggctggc agtgttgcta
+
+ gaattattgc tgcggttaaa aagctcgtag ttggatctgt gctggccgcc
+ gaattattgc tgcggttaaa aagctcgtag ttggatctgt gctggccgcc
+ cgtccgtggc tgtgatgaga cgacgcg--- ---------- ----------
+
+ cggtctgctc gctgagcacg cactggtgcg gtcggctttc ctgcccggta
+ cggtctgctc gctgagcacg cactggtgcg gtcggctttc ctgcccggta
+ ---------- -----gtagg gccttgtgcg atgcgcct-- ----------
+
+ cctccccggc gttggccttc accggtcggc gtcggtggcc gggcgagttt
+ cctccccggc gttggccttc accggtcggc gtcggtggcc gggcgagttt
+ --------gc acttggctta a--------- ------gact tgatgagctc
+
+ actttgaaca aatcagag-- -----tgctt caaacaggcg tttcgcttga
+ actttgaaca aatcagag-- -----tgctt caaacaggcg tttcgcttga
+ actgcgaaga gccgccagca accttttttt catatacatt ttttacaggc
+
+ atgttcgtgc atggaataat agaagaggat ttcggtccga ttttgttggt
+ atgttcgtgc atggaataat agaagaggat ttcggtccga ttttgttggt
+ acacttgtgt gctgatgaac aaaa------ ---------- ----------
+
+ tttgctgacc gagataatgg ttaacagaga caaacggggc cattcgtatt
+ tttgctgacc gagataatgg ttaacagaga caaacggggc cattcgtatt
+ ---------- ---------- ---------- ---------- -------att
+
+ gctacgtgag aggtg----- ---------- ---------- --------
+ gctacgtgag aggtgaaatt cttggaccgt agcaagacgg actacagc
+ ctagccttat cggtggatca ctcggctcgt aggtcgatg- --------
+
diff -r 6f28e90db932 -r bf28a8cff401 test-data/mafft_nwns_result.aln
--- a/test-data/mafft_nwns_result.aln Tue Oct 31 15:48:53 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,458 +0,0 @@
-CLUSTAL format alignment by MAFFT NW-NS-2 (v7.455)
-
-
-1== ----------------------------------------MNG----------------T
-2== ----------------------------------------MNG----------------T
-3== ----------------------------------------MNG----------------T
-4=p ----------------------------------------MNG----------------T
-5=p ----------------------------------------MNG----------------T
-6== ----------------------------------------MKQ----------------V
-7== ----------------------------------------MRK----------------M
-8=opsin, ------------------------------------------------------------
-9== ----------------------------------------MAQQWSLQRLAGRHPQDSYE
-10== ----------------------------------------MAQQWSLQRLAGRHPQDSYE
-11== ----------------------------------------MTEAWNVAVFAARRSRDD-D
-12== ----------------------------------------MAA-WEAAFAARRRHEE--E
-13== ----------------------------------------MS-----------------S
-14== ----------MESGNVSS------------SLFGNVST-ALRP----------------E
-15== ----------MEYHNVSS------------VL-GNVSS-VLRP----------------D
-16== ----------ME------------------PL-CNASEPPLRP----------------E
-17== ----------MD------------------AL-CNASEPPLRP----------------E
-18== --------------------------------MTNATGPQMAY----------------Y
-19== --------------------------------MANVTGPQMAF----------------Y
-20== ----------ME----SF------------AVAAAQLGPHFAP----------------L
-21== ----------ME----SF------------AVAAAQLGPHFAP----------------L
-22== ----------MD----SF------------AAVATQLGPQFAA----------------P
-23== -----MERSHLP----ET------------PFDLAHSGPRFQA----------------Q
-24== -----MERSLLP----EP------------PLAMALLGPRFEA----------------Q
-25== --------------------------------MIAVSGPSYEA----------------F
-26== ----------------------------------MANQLSYSS----------------L
-27== ---------------------------------------MVES----------------T
-28== ----------------------------------------MGR----------------D
-29== ---------MMDVNSSGRPDLYGHLRSFL-LPEVGRGLPDLSPDGGA------------D
-30== ------------------------------------------------------------
-31=p ------------------------------------------------------------
-32== ----------MDVLSP--------------------------------------------
-33=p ----------MDVFSF--------------------------------------------
-34== MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T
-35=p MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T
-36== -MEGAEGQEELDWEAL-------YLRLP--LQNCSWNSTGWEPNWNV------------T
-
-
-1== E--G--DNFYVP----FSNKTGLARSPYEYPQY-------YLAEPWK---------YSAL
-2== E--G--PNFYVP----FSNITGVVRSPFEQPQY-------YLAEPWQ---------FSML
-3== E--G--INFYVP----MSNKTGVVRSPFEYPQY-------YLAEPWK---------YRLV
-4=p E--G--KNFYVP----MSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKIL
-5=p E--G--NNFYVP----LSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKLL
-6== PEFH--EDFYIPIPLDINNLS--AYSPFLVPQD-------HLGNQGI---------FMAM
-7== S--E--EEFYL-----FKNIS--SVGPWDGPQY-------HIAPVWA---------FYLQ
-8=opsin, ------------------------------------------------------------
-9== DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT
-10== DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT
-11== DTTR--GSVFT-----YTNTNN-TRGPFEGPNY-------HIAPRWV---------YNLV
-12== DTTR--DSVFT-----YTNSNN-TRGPFEGPNY-------HIAPRWV---------YNLT
-13== NSSQ--AP-----------PNG-TPGPFDGPQW------PYQAPQST---------YVGV
-14== ARLS--A----------ETRLLGWNVPPEELR--------HIPEHWLTYPEPPESMNYLL
-15== ARLS--A----------ESRLLGWNVPPDELR--------HIPEHWLIYPEPPESMNYLL
-16== AR-S--SG---N----GDLQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML
-17== ARMS--SG---S----DELQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML
-18== GAAS--MD-FGY----PEGVSIVDFVRPEIKP--------YVHQHWYNYPPVNPMWHYLL
-19== GSGA--AT-FGY----PEGMTVADFVPDRVKH--------MVLDHWYNYPPVNPMWHYLL
-20== S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL
-21== S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL
-22== S-----------------NGSVVDKVTPDMAH--------LISPYWDQFPAMDPIWAKIL
-23== SSG---------------NGSVLDNVLPDMAH--------LVNPYWSRFAPMDPMMSKIL
-24== TGG---------------NRSVLDNVLPDMAP--------LVNPHWSRFAPMDPTMSKIL
-25== SYGG--QARF-------NNQTVVDKVPPDMLH--------LIDANWYQYPPLNPMWHGIL
-26== GWPY--QP----------NASVVDTMPKEMLY--------MIHEHWYAFPPMNPLWYSIL
-27== TLVN--QT-WWY------NPTVD------------------IHPHWAKFDPIPDAVYYSV
-28== LRDN--ET-WWY------NPSIV------------------VHPHWREFDQVPDAVYYSL
-29== PVAGSWAPHLLS------EVTASPAPTWDAPPDNASGCGEQIN--------YGRVEKVVI
-30== ------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI
-31=p ------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI
-32== ---------------GQGNNTTSPPAPFET-GGNTTGISDVT---------VSYQ--VIT
-33=p ---------------GQGNNTTASQEPFGT-GGNVTSISDVT---------FSYQ--VIT
-34== SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT
-35=p SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT
-36== VVPN--TTWW------------QASAPFDTPAALVRAAAK--------------------
-
-
-1== AAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSM
-2== AAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSL
-3== CCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAW
-4=p ALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAI
-5=p AVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAI
-6== SVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFF
-7== AAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASC
-8=opsin, ------------------------------------------DLAETVIA-STISIVNQV
-9== SVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQV
-10== SVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQV
-11== SFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQI
-12== SVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQI
-13== AVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNI
-14== GTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSF
-15== GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSF
-16== GVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSF
-17== GVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSF
-18== GVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCF
-19== GVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCF
-20== TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
-21== TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
-22== TAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
-23== GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY
-24== GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY
-25== GFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCY
-26== GVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCF
-27== GIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAF
-28== GIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCF
-29== GSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
-30== GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
-31=p GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
-32== SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV
-33=p SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV
-34== SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI
-35=p SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI
-36== AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEV
- .
-
-1== N-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHAIM
-2== H-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHAIM
-3== N-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHAMM
-4=p N-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHAFA
-5=p N-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHASA
-6== N-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHAIA
-7== N-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHALT
-8=opsin, S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV
-9== Y-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLAIV
-10== S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV
-11== F-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLAII
-12== S-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLAVA
-13== N-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHAVS
-14== H-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIA
-15== H-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIA
-16== H-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI
-17== H-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI
-18== SGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKAVV
-19== SGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKATF
-20== F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG
-21== F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG
-22== F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG
-23== Y-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM
-24== Y-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM
-25== Y-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGALI
-26== A-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKATL
-27== M-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFL
-28== L-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFI
-29== IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
-30== IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
-31=p IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
-32== L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA
-33=p L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA
-34== S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL
-35=p S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL
-36== V-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGM
- : : * : : :*: : :
-
-1== GVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYVVYMF
-2== GVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMF
-3== GIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYVLYMF
-4=p GIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYVIYMF
-5=p GIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYVLYMF
-6== GCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYVMFLF
-7== VVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYTWFLF
-8=opsin, GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM
-9== GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM
-10== GIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM
-11== GIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLM
-12== GILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLM
-13== GCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSYILSLF
-14== MIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF
-15== MIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF
-16== MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF
-17== MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF
-18== FALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYNIFIF
-19== MCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYNICIF
-20== KM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYLIFYS
-21== KIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS
-22== KIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS
-23== KILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYLITYS
-24== KIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYLITYS
-25== RIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYLVCYG
-26== LLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYVVIYG
-27== MIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFILCMY
-28== MIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNILCMF
-29== MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST
-30== MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST
-31=p MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST
-32== LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST
-33=p LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST
-34== MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST
-35=p MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST
-36== MIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFAT
- :
-
-1== VVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ--------------------------
-2== VVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ--------------------------
-3== VIHFIIPVVVIFFSYGRLICKV----REAAAQQQ--------------------------
-4=p VCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ--------------------------
-5=p ICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ--------------------------
-6== CFCFAVPFGTIVFCYGQLLITL----KLAAKAQA--------------------------
-7== IFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ--------------------------
-8=opsin, VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK--------------------------
-9== VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK--------------------------
-10== VTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK--------------------------
-11== ITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK--------------------------
-12== VTCCFFPLAIIILCYLQVWLAI----RAVAAQQK--------------------------
-13== VTCFVLPLSLILFSYTNLLLTL----RAAAAQQK--------------------------
-14== FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK-------------------------
-15== FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK-------------------------
-16== FFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK-------------------------
-17== LFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK-------------------------
-18== VFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK-------------------------
-19== IFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK-------------------------
-20== IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------------------
-21== IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------------------
-22== IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK-------------------------
-23== LFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK-------------------------
-24== LFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK-------------------------
-25== IWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK-------------------------
-26== LAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK-------------------------
-27== FCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR-------------------------
-28== ILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR-------------------------
-29== AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF----------------------
-30== AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF----------------------
-31=p AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF----------------------
-32== FGAFYIPLLLMLVLYGRIF-------RAARFRIRK-------------------------
-33=p FGAFYIPLLLMLVLYGRIF-------RAARFRIRK-------------------------
-34== VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY
-35=p VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY
-36== ASSFYVPVLIILILYWRIY-------QTARKRIR--------------------------
- * : * :
-
-1== ------------------------------------------------------------
-2== ------------------------------------------------------------
-3== ------------------------------------------------------------
-4=p ------------------------------------------------------------
-5=p ------------------------------------------------------------
-6== ------------------------------------------------------------
-7== ------------------------------------------------------------
-8=opsin, ------------------------------------------------------------
-9== ------------------------------------------------------------
-10== ------------------------------------------------------------
-11== ------------------------------------------------------------
-12== ------------------------------------------------------------
-13== ------------------------------------------------------------
-14== --------------------------------------------------MN--------
-15== --------------------------------------------------MN--------
-16== --------------------------------------------------MN--------
-17== --------------------------------------------------MN--------
-18== --------------------------------------------------MN--------
-19== --------------------------------------------------MN--------
-20== --------------------------------------------------MN--------
-21== --------------------------------------------------MN--------
-22== --------------------------------------------------MN--------
-23== --------------------------------------------------MN--------
-24== --------------------------------------------------MN--------
-25== --------------------------------------------------MN--------
-26== --------------------------------------------------MN--------
-27== --------------------------------------------------LN--------
-28== --------------------------------------------------LN--------
-29== ---------------------------------PGFPRV----EPDSVIALNG-------
-30== ---------------------------------PGFPRV----QPESVISLNG-------
-31=p ---------------------------------PGFPRV----QPESVISLNG-------
-32== --------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR--
-33=p --------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG--
-34== SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS---
-35=p SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS---
-36== -------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIA
-
-
-1== ------------------------------------------------------------
-2== ------------------------------------------------------------
-3== ------------------------------------------------------------
-4=p ------------------------------------------------------------
-5=p ------------------------------------------------------------
-6== ------------------------------------------------------------
-7== ------------------------------------------------------------
-8=opsin, ------------------------------------------------------------
-9== ------------------------------------------------------------
-10== ------------------------------------------------------------
-11== ------------------------------------------------------------
-12== ------------------------------------------------------------
-13== ------------------------------------------------------------
-14== --VESL------------------------------------------------------
-15== --VDSL------------------------------------------------------
-16== --VESL------------------------------------------------------
-17== --VESL------------------------------------------------------
-18== --VSTL------------------------------------------------------
-19== --VTNL------------------------------------------------------
-20== --VKSL------------------------------------------------------
-21== --VKSL------------------------------------------------------
-22== --VKSL------------------------------------------------------
-23== --VKSL------------------------------------------------------
-24== --VKSL------------------------------------------------------
-25== --VASL------------------------------------------------------
-26== --VASL------------------------------------------------------
-27== --AKEL------------------------------------------------------
-28== --AKEL------------------------------------------------------
-29== --IVKL----------------------QK---------EVEECAN--------------
-30== --VVKL----------------------QK---------EVEECAN--------------
-31=p --VVKL----------------------QK---------EVEECAN--------------
-32== -------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG--
-33=p -------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG--
-34== -------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS--
-35=p -------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS--
-36== AAVVAVIGRPLPTISETTTTGFTNVSS----NNTSP---EKQSCANGLEADPPTTGYGAV
-
-
-1== -------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H
-2== -------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H
-3== -------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N
-4=p -------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N
-5=p -------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N
-6== -------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-H
-7== -------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-N
-8=opsin, -------------ESESTQK------AEKEVTRMVVVMVLAFC-----------------
-9== -------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-N
-10== -------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-N
-11== -------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-N
-12== -------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-N
-13== -------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-H
-14== ----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-G
-15== ----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-G
-16== ----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G
-17== ----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G
-18== ----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-G
-19== ----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-G
-20== ----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K
-21== ----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K
-22== ----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-K
-23== ----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-K
-24== ----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-K
-25== ----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-N
-26== ----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-S
-27== ----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-G
-28== ----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-G
-29== ----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
-30== ----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
-31=p ----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
-32== PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C
-33=p SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C
-34== -----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V
-35=p -----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V
-36== AAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-C
- * . :
-
-1== QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------GKNPL
-2== QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------GKNPL
-3== KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------GKNPF
-4=p KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------GKNPL
-5=p KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------GKNPL
-6== RGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC--------GKN-I
-7== RNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC--------GKA-M
-8=opsin, ------------------------------------------------------------
-9== PGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V
-10== PGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V
-11== PGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF---------GKK-V
-12== PGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF---------GKK-V
-13== KGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY----QPQR-T
-14== DKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------N
-15== DKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------S
-16== DKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------N
-17== DKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------N
-18== DTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HE-T
-19== NAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HE-K
-20== F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V
-21== F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V
-22== F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V
-23== I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GN-T
-24== I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GT-T
-25== L-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------A-A
-26== SGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GS-G
-27== PAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKE-C
-28== PLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKE-T
-29== GTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR--
-30== GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR----NINR--
-31=p GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR--
-32== ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RQ----
-33=p ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RR----
-34== DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR--
-35=p DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR--
-36== DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV----RRRR--
-
-
-1== GDDE--SGASTSKTEVSSVS-TSPVSPA--------------------------------
-2== GDDE--ASATASKTE------TSQVAPA--------------------------------
-3== GDEDVSSTVSQSKTEVSSVS-SSQVSPA--------------------------------
-4=p GDDE-SSTVSTSKTEVSS------VSPA--------------------------------
-5=p GDEE-SSTVSTSKTEVSS------VSPA--------------------------------
-6== EEDE--ASTSSQVTQVSS------VAPEK-------------------------------
-7== TDES--DTCSSQKTEVSTVS-STQVGPN--------------------------------
-8=opsin, ------------------------------------------------------------
-9== DDGS--ELSSASKTEVSSV---SSVSPA--------------------------------
-10== DDGS--ELSSASKTEVSSV---SSVSPA--------------------------------
-11== DDGS--EASTTSRTEVSSVS-NSSVAPA--------------------------------
-12== DDGS--EVST-SRTEVSSVS-NSSVSPA--------------------------------
-13== GKAS--PGTPGPHADVTAAGLRNKVMPAHPV-----------------------------
-14== EKAP--ESSAVASTSTTQEP--QQTTAA--------------------------------
-15== EKAP--ESRAAISTSTTQEQ--QQTTAA--------------------------------
-16== EKSG--EISSAQST-TTQEQ--QQTTAA--------------------------------
-17== EKSG--EASSAQST-TTQEQ-TQQTSAA--------------------------------
-18== ETKS--NDDSQSNSTVAQDK-A--------------------------------------
-19== DPND--VEENQSSNTQTQEK-S--------------------------------------
-20== DDGK--SSDAQSQA-TASEA-ESKA-----------------------------------
-21== DDGK--SSDAQSQA-TASEA-ESKA-----------------------------------
-22== DDGK--SSEAQSQA-TTSEA-ESKA-----------------------------------
-23== DEPK--PDAPASDTETTSEA-DSKA-----------------------------------
-24== DEPK--PDAPPSDTETTSEA-ESKD-----------------------------------
-25== EPSS--DAVSTTSGTTTVTD-NEKSNA---------------------------------
-26== ESGS--DVKSEASATTTMEE-KPKIPEA--------------------------------
-27== EDAN--DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP
-28== EDDK--DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP
-29== ------KLSAAGMHEALKLAERPERPEF------------VLQNADY-------------
-30== ------KLSAAGMHEALKLAERPERSEF------------VLQNSDH-------------
-31=p ------KLSAAGMHEALKLAERPERSEF------------VLQNSDH-------------
-32== ------------------------------------------------------------
-33=p ------------------------------------------------------------
-34== ------------------------------------------------------------
-35=p ------------------------------------------------------------
-36== ------A---------------PQ------------------------------------
-
-
-1== ---------------------------------------------------------
-2== ---------------------------------------------------------
-3== ---------------------------------------------------------
-4=p ---------------------------------------------------------
-5=p ---------------------------------------------------------
-6== ---------------------------------------------------------
-7== ---------------------------------------------------------
-8=opsin, ---------------------------------------------------------
-9== ---------------------------------------------------------
-10== ---------------------------------------------------------
-11== ---------------------------------------------------------
-12== ---------------------------------------------------------
-13== ---------------------------------------------------------
-14== ---------------------------------------------------------
-15== ---------------------------------------------------------
-16== ---------------------------------------------------------
-17== ---------------------------------------------------------
-18== ---------------------------------------------------------
-19== ---------------------------------------------------------
-20== ---------------------------------------------------------
-21== ---------------------------------------------------------
-22== ---------------------------------------------------------
-23== ---------------------------------------------------------
-24== ---------------------------------------------------------
-25== ---------------------------------------------------------
-26== ---------------------------------------------------------
-27== QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
-28== QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA
-29== --------------------------------------------CRKKGHDS-----
-30== --------------------------------------------CGKKGHDT-----
-31=p --------------------------------------------CGKKGHDT-----
-32== ---------------------------------------------------------
-33=p ---------------------------------------------------------
-34== ---------------------------------------------------------
-35=p ---------------------------------------------------------
-36== ---------------------------------------------------------
-
diff -r 6f28e90db932 -r bf28a8cff401 test-data/sample.fa
--- a/test-data/sample.fa Tue Oct 31 15:48:53 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,285 +0,0 @@
-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
-MNGTEGDNFYVPFSNKTGLARSPYEYPQYYLAEPWKYSALAAYMFFLILVGFPVNFLTLF
-VTVQHKKLRTPLNYILLNLAMANLFMVLFGFTVTMYTSMNGYFVFGPTMCSIEGFFATLG
-GEVALWSLVVLAIERYIVICKPMGNFRFGNTHAIMGVAFTWIMALACAAPPLVGWSRYIP
-EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES
-ASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFTHQGSDFGATFMTLPAFFAKSSAL
-YNPVIYILMNKQFRNCMITTLCCGKNPLGDDESGASTSKTEVSSVSTSPVSPA
-> 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94]
-MNGTEGPNFYVPFSNITGVVRSPFEQPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
-VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG
-GEIGLWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP
-EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES
-ATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFTHQGSNFGPIFMTLPAFFAKTASI
-YNPIIYIMMNKQFRNCMLTSLCCGKNPLGDDEASATASKTETSQVAPA
-> 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9
-MNGTEGINFYVPMSNKTGVVRSPFEYPQYYLAEPWKYRLVCCYIFFLISTGLPINLLTLL
-VTFKHKKLRQPLNYILVNLAVADLFMACFGFTVTFYTAWNGYFVFGPVGCAVEGFFATLG
-GQVALWSLVVLAIERYIVVCKPMGNFRFSATHAMMGIAFTWVMAFSCAAPPLFGWSRYMP
-EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES
-ATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFTNKGADFTATLMAVPAFFSKSSSL
-YNPIIYVLMNKQFRNCMITTICCGKNPFGDEDVSSTVSQSKTEVSSVSSSQVSPA
-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
-MNGTEGKNFYVPMSNRTGLVRSPFEYPQYYLAEPWQFKILALYLFFLMSMGLPINGLTLV
-VTAQHKKLRQPLNFILVNLAVAGTIMVCFGFTVTFYTAINGYFVLGPTGCAVEGFMATLG
-GEVALWSLVVLAIERYIVVCKPMGSFKFSSSHAFAGIAFTWVMALACAAPPLFGWSRYIP
-EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS
-ASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFFNKGADFSAKFMAIPAFFSKSSAL
-YNPVIYVLLNKQFRNCMLTTIFCGKNPLGDDESSTVSTSKTEVSSVSPA
-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
-MNGTEGNNFYVPLSNRTGLVRSPFEYPQYYLAEPWQFKLLAVYMFFLICLGLPINGLTLI
-CTAQHKKLRQPLNFILVNLAVAGAIMVCFGFTVTFYTAINGYFALGPTGCAVEGFMATLG
-GEVALWSLVVLAIERYIVVCKPMGSFKFSSTHASAGIAFTWVMAMACAAPPLVGWSRYIP
-EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS
-ASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFFNKGAAFSAQFMAIPAFFSKTSAL
-YNPVIYVLLNKQFRSCMLTTLFCGKNPLGDEESSTVSTSKTEVSSVSPA
-> 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208-
-MKQVPEFHEDFYIPIPLDINNLSAYSPFLVPQDHLGNQGIFMAMSVFMFFIFIGGASINI
-LTILCTIQFKKLRSHLNYILVNLSIANLFVAIFGSPLSFYSFFNRYFIFGATACKIEGFL
-ATLGGMVGLWSLAVVAFERWLVICKPLGNFTFKTPHAIAGCILPWISALAASLPPLFGWS
-RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKA
-QADSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVSHRGEEFDLRMATIPSCLSK
-ASTVYNPVIYVLMNKQFRSCMMKMVCGKNIEEDEASTSSQVTQVSSVAPEK
-> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
-MRKMSEEEFYLFKNISSVGPWDGPQYHIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATL
-RYKKLRQPLNYILVNVSFGGFLLCIFSVFPVFVASCNGYFVFGRHVCALEGFLGTVAGLV
-TGWSLAFLAFERYIVICKPFGNFRFSSKHALTVVLATWTIGIGVSIPPFFGWSRFIPEGL
-QCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESATT
-QKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVNNRNHGLDLRLVTIPSFFSKSACIYNP
-IIYCFMNKQFQACIMKMVCGKAMTDESDTCSSQKTEVSTVSSTQVGPN
-> 8=opsin, greensensitive human (fragment) S07060
-DLAETVIASTISIVNQVSGYFVLGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKP
-FGNVRFDAKLAIVGIAFSWIWAAVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQS
-YMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFC
-> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM
-IFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISVVNQVYGYFV
-LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNVRFDAKLAIVGIAFSWIWA
-AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYL
-QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAANPGYPFH
-PLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS
-VSPA
-> 10== Z68193 1 human Red Opsin <>[]
-MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM
-IFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISIVNQVSGYFV
-LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRFDAKLAIVGIAFSWIWS
-AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYL
-QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAANPGYAFH
-PLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS
-VSPA
-> 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92]
-MTEAWNVAVFAARRSRDDDDTTRGSVFTYTNTNNTRGPFEGPNYHIAPRWVYNLVSFFMI
-IVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVASTISVFNQIFGYFIL
-GHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNIKFDSKLAIIGIVFSWVWAW
-GWSAPPIFGWSRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQ
-VWMAIRAVAAQQKESESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAANPGYAFHP
-LAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLFGKKVDDGSEASTTSRTEVSSVSNS
-SVAPA
-> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
-MAAWEAAFAARRRHEEEDTTRDSVFTYTNSNNTRGPFEGPNYHIAPRWVYNLTSVWMIFV
-VAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIASTISVINQISGYFILGH
-PMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNIKFDGKLAVAGILFSWLWSCAW
-TAPPIFGWSRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVW
-LAIRAVAAQQKESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAANPGYAFHPLA
-AALPAYFAKSATIYNPIIYVFMNRQFRNCILQLFGKKVDDGSEVSTSRTEVSSVSNSSVS
-PA
-> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-MSSNSSQAPPNGTPGPFDGPQWPYQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYK
-KLRSPLNYILVNLAVADLLVTLCGSSVSLSNNINGFFVFGRRMCELEGFMVSLTGIVGLW
-SLAILALERYVVVCKPLGDFQFQRRHAVSGCAFTWGWALLWSAPPLLGWSSYVPEGLRTS
-CGPNWYTGGSNNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEADTTQRAER
-EVTRMVIVMVMAFLLCWLPYSTFALVVATHKGIIIQPVLASLPSYFSKTATVYNPIIYVF
-MNKQFQSCLLEMLCCGYQPQRTGKASPGTPGPHADVTAAGLRNKVMPAHPV
-> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-MESGNVSSSLFGNVSTALRPEARLSAETRLLGWNVPPEELRHIPEHWLTYPEPPESMNYL
-LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVKTPIFIYNSFH
-QGYALGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEGKMTHGKAIAMIIFIY
-MYATPWVVACYTETWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYY
-YSQIVGHVFSHEKALRDQAKKMNVESLRSNVDKNKETAEIRIAKAAITICFLFFCSWTPY
-GVMSLIGAFGDKTLLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLALNE
-KAPESSAVASTSTTQEPQQTTAA
-> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-MEYHNVSSVLGNVSSVLRPDARLSAESRLLGWNVPPDELRHIPEHWLIYPEPPESMNYLL
-GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIKTPIFIYNSFHQ
-GYALGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEGKMTHGKAIAMIIFIYL
-YATPWVVACYTESWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYYY
-SQIVGHVFSHEKALRDQAKKMNVDSLRSNVDKSKEAAEIRIAKAAITICFLFFASWTPYG
-VMSLIGAFGDKTLLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAISEK
-APESRAAISTSTTQEQQQTTAA
-> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-MEPLCNASEPPLRPEARSSGNGDLQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGVF
-YIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFNSFHRGFAIY
-LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYCT
-PWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFFFSFVCPTLMILYYYSQI
-VGHVFSHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMS
-LIGAFGDKSLLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGVNEKSGE
-ISSAQSTTTQEQQQTTAA
-> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-MDALCNASEPPLRPEARMSSGSDELQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGV
-FYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFIYNSFHRGF
-ALGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYC
-TPWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFLFSFVVPTLMILYYYSQ
-IVGHVFNHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM
-SLIGAFGDKSLLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGVNEKSG
-EASSAQSTTTQEQTQQTSAA
-> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1
-MTNATGPQMAYYGAASMDFGYPEGVSIVDFVRPEIKPYVHQHWYNYPPVNPMWHYLLGVI
-YLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTNVPFFTYNCFSGGV
-WMFSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGPKLTTGKAVVFALISWV
-IAIGCALPPFFGWGNYILEGILDSCSYDYLTQDFNTFSYNIFIFVFDYFLPAAIIVFSYV
-FIVKAIFAHEAAMRAQAKKMNVSTLRSNEADAQRAEIRIAKTALVNVSLWFICWTPYALI
-SLKGVMGDTSGITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCVHETET
-KSNDDSQSNSTVAQDKA
-> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1
-MANVTGPQMAFYGSGAATFGYPEGMTVADFVPDRVKHMVLDHWYNYPPVNPMWHYLLGVV
-YLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTNFPPFCYNCFSGGR
-WMFSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGPKLTQGKATFMCGLAWV
-ISVGWSLPPFFGWGSYTLEGILDSCSYDYFTRDMNTITYNICIFIFDFFLPASVIVFSYV
-FIVKAIFAHEAAMRAQAKKMNVTNLRSNEAETQRAEIRIAKTALVNVSLWFICWTPYAAI
-TIQGLLGNAEGITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCVHEKDP
-NDVEENQSSNTQTQEKS
-> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
-MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI
-GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP
-MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKMYVPEGNLTSC
-GIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSL
-RSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLFKFEGLTPLNTIWGACFAKS
-AACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDAQSQATASEAESKA
-> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
-MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI
-GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP
-MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSSIW
-CLAPAFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAA
-VSAHEKAMREQAKKMNVKSLRSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGL
-FKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDA
-QSQATASEAESKA
-> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
-MDSFAAVATQLGPQFAAPSNGSVVDKVTPDMAHLISPYWDQFPAMDPIWAKILTAYMIII
-GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP
-MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSTIW
-CCLAPVFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIA
-AVSAHEKAMREQAKKMNVKSLRSSEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMG
-LFKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSE
-AQSQATTSEAESKA
-> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
-MERSHLPETPFDLAHSGPRFQAQSSGNGSVLDNVLPDMAHLVNPYWSRFAPMDPMMSKIL
-GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY
-ETWVLGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKILFI
-WMMAVFWTVMPLIGWSAYVPEGNLTACSIDYMTRMWNPRSYLITYSLFVYYTPLFLICYS
-YWFIIAAVAAHEKAMREQAKKMNVKSLRSSEDCDKSAEGKLAKVALTTISLWFMAWTPYL
-VICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVFGNTD
-EPKPDAPASDTETTSEADSKA
-> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
-MERSLLPEPPLAMALLGPRFEAQTGGNRSVLDNVLPDMAPLVNPHWSRFAPMDPTMSKIL
-GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY
-ETWVLGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKIAFI
-WMMAVFWTIMPLIGWSSYVPEGNLTACSIDYMTRQWNPRSYLITYSLFVYYTPLFMICYS
-YWFIIATVAAHEKAMRDQAKKMNVKSLRSSEDCDKSAENKLAKVALTTISLWFMAWTPYL
-IICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVCGTTD
-EPKPDAPPSDTETTSEAESKD
-> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
-MIAVSGPSYEAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYPPLNPMWHGILGFVIG
-MLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCMSPPMVINCYYETWVL
-GPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGKPLSINGALIRIIAIWLFSL
-GWTIAPMFGWNRYVPEGNMTACGTDYFNRGLLSASYLVCYGIWVYFVPLFLIIYSYWFII
-QAVAAHEKNMREQAKKMNVASLRSSENQNTSAECKLAKVALMTISLWFMAWTPYLVINFS
-GIFNLVKISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLACAAEPSSDAV
-STTSGTTTVTDNEKSNA
-> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-MANQLSYSSLGWPYQPNASVVDTMPKEMLYMIHEHWYAFPPMNPLWYSILGVAMIILGII
-CVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFMMPTMTSNCFAETWILGPFMC
-EVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAAPLTHKKATLLLLFVWIWSGGWTIL
-PFFGWSRYVPEGNLTSCTVDYLTKDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAE
-HEKQLREQAKKMNVASLRANADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVFS
-SGTRLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLACGSGESGSDVKSE
-ASATTTMEEKPKIPEA
-> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
-MVESTTLVNQTWWYNPTVDIHPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLF
-SKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFMKKWIFGKVACQLYGLLGGIFG
-FMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVWSVGPVFNWGAYVP
-EGILTSCSFDYLSTDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAK
-RLNAKELRKAQAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQFGPAEWVTPYAAEL
-PVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECEDANDAEEEVVASER
-GGESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGYPPQGYPPQGAYPPPQGYPPQGYPP
-QGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
-> 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93]
-MGRDLRDNETWWYNPSIVVHPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFT
-KTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFLKKWIFGFAACKVYGFIGGIFGF
-MSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLWAIGPIFGWGAYTLE
-GVLCNCSFDYISRDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR
-LNAKELRKAQAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQFGPLEWVTPYAAQLP
-VMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDDKDAETEIPAGESS
-DAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGYPPQGYPPQGY
-PPPPQGAPPQGAPPAAPPQGVDNQAYQA
-> 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
-MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL
-VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM
-DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW
-AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV
-QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL
-PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQC
-QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT
-> 31=p A47425 serotonin receptor 5HT-7 - rat
-MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL
-VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM
-DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW
-AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV
-QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL
-PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQC
-QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT
-> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-MDVLSPGQGNNTTSPPAPFETGGNTTGISDVTVSYQVITSLLLGTLIFCAVLGNACVVAA
-IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC
-TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED
-RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT
-RHGASPAPQPKKSVNGESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGN
-SKEHLPLPSEAGPTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP
-FFIVALVLPFCESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC
-RQ
-> 33=p A35181 serotonin receptor class 1A - rat
-MDVFSFGQGNNTTASQEPFGTGGNVTSISDVTFSYQVITSLLLGTLIFCAVLGNACVVAA
-IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC
-TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED
-RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT
-SLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGN
-SKEHLPLPSESGSNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP
-FFIVALVLPFCESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC
-RR
-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV
-MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT
-ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP
-DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE
-ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA
-NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK
-LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL
-NPIIYTIFSPEFRSAFQKILFGKYRRGHR
-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV
-MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT
-ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP
-DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE
-ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA
-NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK
-LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL
-NPIIYTIFSPEFRSAFQKILFGKYRRGHR
-> 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi
-MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVPNTTWWQASAPFDTPAALVR
-AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLVMPLGAV
-YEVVQRWTLGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTNIDYIHASTAKRVGMM
-IACVWTVSFFVCIAQLLGWKDPDWNQRVSEDLRCVVSQDVGYQIFATASSFYVPVLIILI
-LYWRIYQTARKRIRRRRGATARGGVGPPPVPAGGALVAGGGSGGIAAAVVAVIGRPLPTI
-SETTTTGFTNVSSNNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKR
-ERKAAKTLAIITGAFVACWLPFFVLAILVPTCDCEVSPVLTSLSLWLGYFNSTLNPVIYT
-VFSPEFRHAFQRLLCGRRVRRRRAPQ
diff -r 6f28e90db932 -r bf28a8cff401 test-data/sample_amino.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_amino.fa Wed Mar 20 07:34:52 2024 +0000
@@ -0,0 +1,285 @@
+> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
+MNGTEGDNFYVPFSNKTGLARSPYEYPQYYLAEPWKYSALAAYMFFLILVGFPVNFLTLF
+VTVQHKKLRTPLNYILLNLAMANLFMVLFGFTVTMYTSMNGYFVFGPTMCSIEGFFATLG
+GEVALWSLVVLAIERYIVICKPMGNFRFGNTHAIMGVAFTWIMALACAAPPLVGWSRYIP
+EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES
+ASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFTHQGSDFGATFMTLPAFFAKSSAL
+YNPVIYILMNKQFRNCMITTLCCGKNPLGDDESGASTSKTEVSSVSTSPVSPA
+> 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94]
+MNGTEGPNFYVPFSNITGVVRSPFEQPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIGLWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP
+EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES
+ATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFTHQGSNFGPIFMTLPAFFAKTASI
+YNPIIYIMMNKQFRNCMLTSLCCGKNPLGDDEASATASKTETSQVAPA
+> 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9
+MNGTEGINFYVPMSNKTGVVRSPFEYPQYYLAEPWKYRLVCCYIFFLISTGLPINLLTLL
+VTFKHKKLRQPLNYILVNLAVADLFMACFGFTVTFYTAWNGYFVFGPVGCAVEGFFATLG
+GQVALWSLVVLAIERYIVVCKPMGNFRFSATHAMMGIAFTWVMAFSCAAPPLFGWSRYMP
+EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES
+ATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFTNKGADFTATLMAVPAFFSKSSSL
+YNPIIYVLMNKQFRNCMITTICCGKNPFGDEDVSSTVSQSKTEVSSVSSSQVSPA
+> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
+MNGTEGKNFYVPMSNRTGLVRSPFEYPQYYLAEPWQFKILALYLFFLMSMGLPINGLTLV
+VTAQHKKLRQPLNFILVNLAVAGTIMVCFGFTVTFYTAINGYFVLGPTGCAVEGFMATLG
+GEVALWSLVVLAIERYIVVCKPMGSFKFSSSHAFAGIAFTWVMALACAAPPLFGWSRYIP
+EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS
+ASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFFNKGADFSAKFMAIPAFFSKSSAL
+YNPVIYVLLNKQFRNCMLTTIFCGKNPLGDDESSTVSTSKTEVSSVSPA
+> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
+MNGTEGNNFYVPLSNRTGLVRSPFEYPQYYLAEPWQFKLLAVYMFFLICLGLPINGLTLI
+CTAQHKKLRQPLNFILVNLAVAGAIMVCFGFTVTFYTAINGYFALGPTGCAVEGFMATLG
+GEVALWSLVVLAIERYIVVCKPMGSFKFSSTHASAGIAFTWVMAMACAAPPLVGWSRYIP
+EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS
+ASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFFNKGAAFSAQFMAIPAFFSKTSAL
+YNPVIYVLLNKQFRSCMLTTLFCGKNPLGDEESSTVSTSKTEVSSVSPA
+> 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208-
+MKQVPEFHEDFYIPIPLDINNLSAYSPFLVPQDHLGNQGIFMAMSVFMFFIFIGGASINI
+LTILCTIQFKKLRSHLNYILVNLSIANLFVAIFGSPLSFYSFFNRYFIFGATACKIEGFL
+ATLGGMVGLWSLAVVAFERWLVICKPLGNFTFKTPHAIAGCILPWISALAASLPPLFGWS
+RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKA
+QADSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVSHRGEEFDLRMATIPSCLSK
+ASTVYNPVIYVLMNKQFRSCMMKMVCGKNIEEDEASTSSQVTQVSSVAPEK
+> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
+MRKMSEEEFYLFKNISSVGPWDGPQYHIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATL
+RYKKLRQPLNYILVNVSFGGFLLCIFSVFPVFVASCNGYFVFGRHVCALEGFLGTVAGLV
+TGWSLAFLAFERYIVICKPFGNFRFSSKHALTVVLATWTIGIGVSIPPFFGWSRFIPEGL
+QCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESATT
+QKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVNNRNHGLDLRLVTIPSFFSKSACIYNP
+IIYCFMNKQFQACIMKMVCGKAMTDESDTCSSQKTEVSTVSSTQVGPN
+> 8=opsin, greensensitive human (fragment) S07060
+DLAETVIASTISIVNQVSGYFVLGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKP
+FGNVRFDAKLAIVGIAFSWIWAAVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQS
+YMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFC
+> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
+MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM
+IFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISVVNQVYGYFV
+LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNVRFDAKLAIVGIAFSWIWA
+AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYL
+QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAANPGYPFH
+PLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS
+VSPA
+> 10== Z68193 1 human Red Opsin <>[]
+MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM
+IFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISIVNQVSGYFV
+LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRFDAKLAIVGIAFSWIWS
+AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYL
+QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAANPGYAFH
+PLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS
+VSPA
+> 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92]
+MTEAWNVAVFAARRSRDDDDTTRGSVFTYTNTNNTRGPFEGPNYHIAPRWVYNLVSFFMI
+IVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVASTISVFNQIFGYFIL
+GHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNIKFDSKLAIIGIVFSWVWAW
+GWSAPPIFGWSRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQ
+VWMAIRAVAAQQKESESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAANPGYAFHP
+LAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLFGKKVDDGSEASTTSRTEVSSVSNS
+SVAPA
+> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
+MAAWEAAFAARRRHEEEDTTRDSVFTYTNSNNTRGPFEGPNYHIAPRWVYNLTSVWMIFV
+VAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIASTISVINQISGYFILGH
+PMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNIKFDGKLAVAGILFSWLWSCAW
+TAPPIFGWSRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVW
+LAIRAVAAQQKESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAANPGYAFHPLA
+AALPAYFAKSATIYNPIIYVFMNRQFRNCILQLFGKKVDDGSEVSTSRTEVSSVSNSSVS
+PA
+> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
+MSSNSSQAPPNGTPGPFDGPQWPYQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYK
+KLRSPLNYILVNLAVADLLVTLCGSSVSLSNNINGFFVFGRRMCELEGFMVSLTGIVGLW
+SLAILALERYVVVCKPLGDFQFQRRHAVSGCAFTWGWALLWSAPPLLGWSSYVPEGLRTS
+CGPNWYTGGSNNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEADTTQRAER
+EVTRMVIVMVMAFLLCWLPYSTFALVVATHKGIIIQPVLASLPSYFSKTATVYNPIIYVF
+MNKQFQSCLLEMLCCGYQPQRTGKASPGTPGPHADVTAAGLRNKVMPAHPV
+> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
+MESGNVSSSLFGNVSTALRPEARLSAETRLLGWNVPPEELRHIPEHWLTYPEPPESMNYL
+LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVKTPIFIYNSFH
+QGYALGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEGKMTHGKAIAMIIFIY
+MYATPWVVACYTETWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYY
+YSQIVGHVFSHEKALRDQAKKMNVESLRSNVDKNKETAEIRIAKAAITICFLFFCSWTPY
+GVMSLIGAFGDKTLLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLALNE
+KAPESSAVASTSTTQEPQQTTAA
+> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
+MEYHNVSSVLGNVSSVLRPDARLSAESRLLGWNVPPDELRHIPEHWLIYPEPPESMNYLL
+GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIKTPIFIYNSFHQ
+GYALGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEGKMTHGKAIAMIIFIYL
+YATPWVVACYTESWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYYY
+SQIVGHVFSHEKALRDQAKKMNVDSLRSNVDKSKEAAEIRIAKAAITICFLFFASWTPYG
+VMSLIGAFGDKTLLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAISEK
+APESRAAISTSTTQEQQQTTAA
+> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
+MEPLCNASEPPLRPEARSSGNGDLQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGVF
+YIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFNSFHRGFAIY
+LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYCT
+PWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFFFSFVCPTLMILYYYSQI
+VGHVFSHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMS
+LIGAFGDKSLLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGVNEKSGE
+ISSAQSTTTQEQQQTTAA
+> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
+MDALCNASEPPLRPEARMSSGSDELQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGV
+FYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFIYNSFHRGF
+ALGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYC
+TPWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFLFSFVVPTLMILYYYSQ
+IVGHVFNHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM
+SLIGAFGDKSLLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGVNEKSG
+EASSAQSTTTQEQTQQTSAA
+> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1
+MTNATGPQMAYYGAASMDFGYPEGVSIVDFVRPEIKPYVHQHWYNYPPVNPMWHYLLGVI
+YLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTNVPFFTYNCFSGGV
+WMFSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGPKLTTGKAVVFALISWV
+IAIGCALPPFFGWGNYILEGILDSCSYDYLTQDFNTFSYNIFIFVFDYFLPAAIIVFSYV
+FIVKAIFAHEAAMRAQAKKMNVSTLRSNEADAQRAEIRIAKTALVNVSLWFICWTPYALI
+SLKGVMGDTSGITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCVHETET
+KSNDDSQSNSTVAQDKA
+> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1
+MANVTGPQMAFYGSGAATFGYPEGMTVADFVPDRVKHMVLDHWYNYPPVNPMWHYLLGVV
+YLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTNFPPFCYNCFSGGR
+WMFSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGPKLTQGKATFMCGLAWV
+ISVGWSLPPFFGWGSYTLEGILDSCSYDYFTRDMNTITYNICIFIFDFFLPASVIVFSYV
+FIVKAIFAHEAAMRAQAKKMNVTNLRSNEAETQRAEIRIAKTALVNVSLWFICWTPYAAI
+TIQGLLGNAEGITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCVHEKDP
+NDVEENQSSNTQTQEKS
+> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
+MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI
+GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP
+MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKMYVPEGNLTSC
+GIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSL
+RSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLFKFEGLTPLNTIWGACFAKS
+AACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDAQSQATASEAESKA
+> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
+MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI
+GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP
+MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSSIW
+CLAPAFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAA
+VSAHEKAMREQAKKMNVKSLRSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGL
+FKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDA
+QSQATASEAESKA
+> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
+MDSFAAVATQLGPQFAAPSNGSVVDKVTPDMAHLISPYWDQFPAMDPIWAKILTAYMIII
+GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP
+MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSTIW
+CCLAPVFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIA
+AVSAHEKAMREQAKKMNVKSLRSSEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMG
+LFKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSE
+AQSQATTSEAESKA
+> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
+MERSHLPETPFDLAHSGPRFQAQSSGNGSVLDNVLPDMAHLVNPYWSRFAPMDPMMSKIL
+GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY
+ETWVLGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKILFI
+WMMAVFWTVMPLIGWSAYVPEGNLTACSIDYMTRMWNPRSYLITYSLFVYYTPLFLICYS
+YWFIIAAVAAHEKAMREQAKKMNVKSLRSSEDCDKSAEGKLAKVALTTISLWFMAWTPYL
+VICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVFGNTD
+EPKPDAPASDTETTSEADSKA
+> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
+MERSLLPEPPLAMALLGPRFEAQTGGNRSVLDNVLPDMAPLVNPHWSRFAPMDPTMSKIL
+GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY
+ETWVLGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKIAFI
+WMMAVFWTIMPLIGWSSYVPEGNLTACSIDYMTRQWNPRSYLITYSLFVYYTPLFMICYS
+YWFIIATVAAHEKAMRDQAKKMNVKSLRSSEDCDKSAENKLAKVALTTISLWFMAWTPYL
+IICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVCGTTD
+EPKPDAPPSDTETTSEAESKD
+> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
+MIAVSGPSYEAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYPPLNPMWHGILGFVIG
+MLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCMSPPMVINCYYETWVL
+GPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGKPLSINGALIRIIAIWLFSL
+GWTIAPMFGWNRYVPEGNMTACGTDYFNRGLLSASYLVCYGIWVYFVPLFLIIYSYWFII
+QAVAAHEKNMREQAKKMNVASLRSSENQNTSAECKLAKVALMTISLWFMAWTPYLVINFS
+GIFNLVKISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLACAAEPSSDAV
+STTSGTTTVTDNEKSNA
+> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
+MANQLSYSSLGWPYQPNASVVDTMPKEMLYMIHEHWYAFPPMNPLWYSILGVAMIILGII
+CVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFMMPTMTSNCFAETWILGPFMC
+EVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAAPLTHKKATLLLLFVWIWSGGWTIL
+PFFGWSRYVPEGNLTSCTVDYLTKDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAE
+HEKQLREQAKKMNVASLRANADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVFS
+SGTRLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLACGSGESGSDVKSE
+ASATTTMEEKPKIPEA
+> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
+MVESTTLVNQTWWYNPTVDIHPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLF
+SKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFMKKWIFGKVACQLYGLLGGIFG
+FMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVWSVGPVFNWGAYVP
+EGILTSCSFDYLSTDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAK
+RLNAKELRKAQAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQFGPAEWVTPYAAEL
+PVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECEDANDAEEEVVASER
+GGESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGYPPQGYPPQGAYPPPQGYPPQGYPP
+QGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
+> 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93]
+MGRDLRDNETWWYNPSIVVHPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFT
+KTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFLKKWIFGFAACKVYGFIGGIFGF
+MSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLWAIGPIFGWGAYTLE
+GVLCNCSFDYISRDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR
+LNAKELRKAQAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQFGPLEWVTPYAAQLP
+VMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDDKDAETEIPAGESS
+DAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGYPPQGYPPQGY
+PPPPQGAPPQGAPPAAPPQGVDNQAYQA
+> 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
+MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL
+VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM
+DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW
+AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV
+QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL
+PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQC
+QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT
+> 31=p A47425 serotonin receptor 5HT-7 - rat
+MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL
+VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM
+DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW
+AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV
+QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL
+PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQC
+QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT
+> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
+MDVLSPGQGNNTTSPPAPFETGGNTTGISDVTVSYQVITSLLLGTLIFCAVLGNACVVAA
+IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC
+TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED
+RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT
+RHGASPAPQPKKSVNGESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGN
+SKEHLPLPSEAGPTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP
+FFIVALVLPFCESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC
+RQ
+> 33=p A35181 serotonin receptor class 1A - rat
+MDVFSFGQGNNTTASQEPFGTGGNVTSISDVTFSYQVITSLLLGTLIFCAVLGNACVVAA
+IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC
+TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED
+RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT
+SLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGN
+SKEHLPLPSESGSNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP
+FFIVALVLPFCESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC
+RR
+> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
+MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
+HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV
+MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT
+ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP
+DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE
+ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA
+NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK
+LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL
+NPIIYTIFSPEFRSAFQKILFGKYRRGHR
+> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
+MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
+HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV
+MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT
+ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP
+DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE
+ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA
+NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK
+LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL
+NPIIYTIFSPEFRSAFQKILFGKYRRGHR
+> 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi
+MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVPNTTWWQASAPFDTPAALVR
+AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLVMPLGAV
+YEVVQRWTLGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTNIDYIHASTAKRVGMM
+IACVWTVSFFVCIAQLLGWKDPDWNQRVSEDLRCVVSQDVGYQIFATASSFYVPVLIILI
+LYWRIYQTARKRIRRRRGATARGGVGPPPVPAGGALVAGGGSGGIAAAVVAVIGRPLPTI
+SETTTTGFTNVSSNNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKR
+ERKAAKTLAIITGAFVACWLPFFVLAILVPTCDCEVSPVLTSLSLWLGYFNSTLNPVIYT
+VFSPEFRHAFQRLLCGRRVRRRRAPQ
diff -r 6f28e90db932 -r bf28a8cff401 test-data/sample_nuc.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_nuc.fa Wed Mar 20 07:34:52 2024 +0000
@@ -0,0 +1,41 @@
+>MZ681498.1 Rotylenchus bunae isolate GE29E-RO6 small subunit ribosomal RNA gene, partial sequence
+ATGCATGTATAAGTATAACCTGCCAGACAGGGAAACTGCGGACGGCTCATTACAACAGCCTTAATTTACT
+TGACCTTGACAACCTACTTGGATAACTGCGGTAATTCTGGAGCTAATACATGCACCAAAGCTCCGATCCC
+TCGTGGAGAGGAGCGCATTTGTTCGCCACAAAACCAAACGCCCCACAAGGGCGTTCAGTGTTGACTCAGA
+ATAACTAAGCTGATCGCACGGTCTTGCACCGGCGACGTGTCTTTCAAGTGTCTGCCTTATCAACTTTCGA
+TGGTAGTGTATCTGCCTACCATGGTTGTGACGGGTAACGGAGGATAAGGGTTCGACTCCGGAGAAGGGGC
+CTGAGAAATGGCCACTACGTCTAAGGATGGCAGCAGGCGCGCAAATTACCCACTCTCAACACGCTGAGGA
+GGTAGTGAAGAGAAATAACGAGACCGTTCTCACATGAGGCCGGTCATCGGAATGGGTACAACTTAAACCC
+TTTAACGAGTATCTATGAGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCTCAAAATGCAT
+AGAATTATTGCTGCGGTTAAAAAGCTCGTAGTTGGATCTGTGCTGGCCGCCCGGTCTGCTCGCTGAGCAC
+GCACTGGTGCGGTCGGCTTTCCTGCCCGGTACCTCCCCGGCGTTGGCCTTCACCGGTCGGCGTCGGTGGC
+CGGGCGAGTTTACTTTGAACAAATCAGAGTGCTTCAAACAGGCGTTTCGCTTGAATGTTCGTGCATGGAA
+TAATAGAAGAGGATTTCGGTCCGATTTTGTTGGTTTTGCTGACCGAGATAATGGTTAACAGAGACAAACG
+GGGCCATTCGTATTGCTACGTGAGAGGTG
+
+>MZ681497.1 Rotylenchus bunae isolate GE29A-R1 small subunit ribosomal RNA gene, partial sequence
+CTGCGGACGGCTCATTACAACAGCCTTAATTTACTTGACCTTGACAACCTACTTGGATAACTGCGGTAAT
+TCTGGAGCTAATACATGCACCAAAGCTCCGATCCCTCGTGGAGAGGAGCGCATTTGTTCGCCACAAAACC
+AAACGCCCCACAAGGGCGTTCAGTGTTGACTCAGAATAACTAAGCTGATCGCACGGTCTTGCACCGGCGA
+CGTGTCTTTCAAGTGTCTGCCTTATCAACTTTCGATGGTAGTGTATCTGCCTACCATGGTTGTGACGGGT
+AACGGAGGATAAGGGTTCGACTCCGGAGAAGGGGCCTGAGAAATGGCCACTACGTCTAAGGATGGCAGCA
+GGCGCGCAAATTACCCACTCTCAACACGCTGAGGAGGTAGTGAAGAGAAATAACGAGACCGTTCTCACAT
+GAGGCCGGTCATCGGAATGGGTACAACTTAAACCCTTTAACGAGTATCTATGAGAGGGCAAGTCTGGTGC
+CAGCAGCCGCGGTAATTCCAGCTCTCAAAATGCATAGAATTATTGCTGCGGTTAAAAAGCTCGTAGTTGG
+ATCTGTGCTGGCCGCCCGGTCTGCTCGCTGAGCACGCACTGGTGCGGTCGGCTTTCCTGCCCGGTACCTC
+CCCGGCGTTGGCCTTCACCGGTCGGCGTCGGTGGCCGGGCGAGTTTACTTTGAACAAATCAGAGTGCTTC
+AAACAGGCGTTTCGCTTGAATGTTCGTGCATGGAATAATAGAAGAGGATTTCGGTCCGATTTTGTTGGTT
+TTGCTGACCGAGATAATGGTTAACAGAGACAAACGGGGCCATTCGTATTGCTACGTGAGAGGTGAAATTC
+TTGGACCGTAGCAAGACGGACTACAGC
+
+>ON855043.1 Rotylenchus sp. JQ-2022 internal transcribed spacer 1 and 5.8S ribosomal RNA gene, partial sequence
+GGCCGTCACTCCGGGGGGACAACACAAACGCCCTGAGGGCTTTACTCGTTGGGGTGCAAACTGGATCGAG
+TGGCGCACACATCCCTTCCACGCAAAGACCTGCTGAAGAGGTCGGAGGCGATGAGTCCGAGCAACCCCAC
+AAGCAACCAGGTTGGGGAAGCTGCACACGATACTGGGATGCACGCCCCCAGGGCACCTAACGGCTGCCGC
+TGGCGTCTGTGCGTCGTTGAGCAGTTGTTGCGCACTTGCTTTTGTCGGAGCTGTACTCGGAGCATGCTGG
+CATGGACCCACACAAAAGTGTGTGGCAGCGGCCACACACCCCTGTCCATGTCCTACGGACCGTAGCTAGG
+GCGTGCTCTGGGTTTCTTCGGCTGGCAGTGTTGCTACGTCCGTGGCTGTGATGAGACGACGCGGTAGGGC
+CTTGTGCGATGCGCCTGCACTTGGCTTAAGACTTGATGAGCTCACTGCGAAGAGCCGCCAGCAACCTTTT
+TTTCATATACATTTTTTACAGGCACACTTGTGTGCTGATGAACAAAAATTCTAGCCTTATCGGTGGATCA
+CTCGGCTCGTAGGTCGATG
+