Mercurial > repos > rnateam > mafft
changeset 15:bf28a8cff401 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mafft commit 2f6456c314c010fd73f5eeaf809a9afce47353af
author | bgruening |
---|---|
date | Wed, 20 Mar 2024 07:34:52 +0000 |
parents | 6f28e90db932 |
children | 8e649f27aa0d |
files | macros.xml mafft-add.xml mafft.xml test-data/mafft_auto_linsi.aln test-data/mafft_custom_original.clustal.aln test-data/mafft_custom_parttree.aln test-data/mafft_custom_result.aln test-data/mafft_default.aln test-data/mafft_explicit_amino_blosum80.clustal.aln test-data/mafft_fftns_result.aln test-data/mafft_kimura40.phylip.aln test-data/mafft_nwns_result.aln test-data/sample.fa test-data/sample_amino.fa test-data/sample_nuc.fa |
diffstat | 15 files changed, 3444 insertions(+), 2000 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue Oct 31 15:48:53 2023 +0000 +++ b/macros.xml Wed Mar 20 07:34:52 2024 +0000 @@ -1,22 +1,55 @@ <?xml version="1.0"?> <macros> - <token name="@TOOL_VERSION@">7.508</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@TOOL_VERSION@">7.520</token> + <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">22.01</token> + <!-- currently, the fasta3 executable is named according to its major version + => needs updating together with the package requirement! --> + <token name="@FASTA3_EXEC@">fasta36</token> <xml name="biotools"> <xrefs> <xref type="bio.tools">MAFFT</xref> </xrefs> </xml> <xml name="requirements"> - <requirements> - <requirement type="package" version="@TOOL_VERSION@">mafft</requirement> - <requirement type="package" version="36.3.8">fasta3</requirement> - </requirements> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">mafft</requirement> + <requirement type="package" version="36.3.8">fasta3</requirement> + </requirements> + </xml> + <xml name="weighti_param"> + <param argument="--weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments."/> + </xml> + <xml name="parttree_parameters"> + <param argument="--retree" type="integer" value="2" min="1" max="3" label="Guide tree is built this number of times in the progressive stage."/> + <param argument="--partsize" type="integer" value="50" min="0" max="1000" label="Number of partitions in the PartTree algorithm."/> + <param argument="--groupsize" type="integer" value="-1" min="-1" label="Group size" help="Do not make alignment larger than this number of sequences. The default of -1 means set the value automatically to the number of input sequences."/> + </xml> + <xml name="misc_scoring_scheme"> + <param argument="--fmodel" type="boolean" truevalue="--fmodel" falsevalue="" checked="False" label="Incorporate the AA/nuc composition into the scoring matrix?" /> + <conditional name="gap_costs"> + <param name="use_defaults" type="select" label="Configure gap costs"> + <option value="yes">Use default values</option> + <option value="no">Set values</option> + </param> + <when value="yes"/> + <when value="no"> + <param argument="--ep" type="float" value="0.0" label="Gap extension penalty for group-to-group alignment" help="Offset value, which works like gap extension penalty, for group-to-group alignment."/> + <param argument="--op" type="float" value="1.53" label="Gap opening penalty at group-to-group alignment." help="1.53 default value"/> + </when> + </conditional> + </xml> + <xml name="global_align_options"> + <section name="treat_unrelated_segments" title="Handling of unrelated segments in global alignments" expanded="true"> + <param argument="--unalignlevel" type="float" min="0" max="0.8" value="0" label="Over-alignment correction factor" help="The higher this factor the more likely will unrelated sequence stretches in globally related sequences be left unaligned. The default of 0 turns over-alignment correction off, turning it on increases run time."/> + <param argument="--leavegappyregion" type="boolean" truevalue="--leavegappyregion" falsevalue="" label="Leave gappy region" help="Older option for preventing over-alignment by not trying to force gap-rich regions into an alignment. Can be used alone or in combination with the over-alignment correction factor, but has a much higher impact on run time and becomes less effective with more sequences. Not recommended for > ~1000 sequences."/> + </section> </xml> <xml name="citations"> - <citations> - <citation type="doi">10.1093/molbev/mst010</citation> - </citations> + <citations> + <citation type="doi">10.1093/nar/gkf436</citation> + <citation type="doi">10.1093/nar/gki198</citation> + <citation type="doi">10.1093/molbev/mst010</citation> + </citations> </xml> </macros>
--- a/mafft-add.xml Tue Oct 31 15:48:53 2023 +0000 +++ b/mafft-add.xml Wed Mar 20 07:34:52 2024 +0000 @@ -16,43 +16,53 @@ </version_command> <command> <![CDATA[ + sh mk_symlinks.sh && mafft #if $sequences.sequenceType == 'singleseq' - $sequences.preservegap '$inputSequences' + $sequences.preservegap input_dir/sequence #elif $sequences.sequenceType == 'frags' - --addfragments '$inputSequences' + --addfragments input_dir/sequence #elif $sequences.sequenceType == 'group' - --addprofile '$inputSequences' + --addprofile input_dir/sequence #end if - $keeplength $map $reorder - '$inputAlignment' - > '$outputAlignment' - #if $map == '--mapout' - && mv '${inputSequences}.map' '$outputmap' + $keeplength + $mapout + $reorder + input_dir/alignment > '$outputAlignment' + + #if $mapout + && mv input_dir/sequence.map '$outputmap' #end if ]]> </command> + <configfiles> + <configfile filename="mk_symlinks.sh"><![CDATA[ +mkdir input_dir && +ln -s '$inputSequences' input_dir/sequence && +ln -s '$inputAlignment' input_dir/alignment + ]]></configfile> + </configfiles> <inputs> <param name="inputSequences" type="data" format="fasta" label="Sequences to add to the alignment" help="Amino acid or nucleotide sequences in FASTA format."/> <param name="inputAlignment" type="data" format="fasta" label="Alignment" help="Amino acid or nucleotide sequences in aligned FASTA format."/> <conditional name="sequences"> - <param name="sequenceType" type="select" label="What do you want to add to the alignment" > + <param name="sequenceType" type="select" label="What do you want to add to the alignment?" > <option value="singleseq">A single sequence</option> <option value="frags" selected="true">Fragments</option> <option value="group">An alignment</option> </param> <when value='singleseq'> - <param name="preservegap" type="select" label="Preserve the original alignment" help="Keep the given alignment unchanged (--add .)If not, the aligned letters in the seed alignment are preserved but gaps are not necessarily preserved (--seed)"> - <option value="--add" selected="true" >Yes</option> - <option value="--seed">no</option> + <param name="preservegap" type="select" label="Preserve the original alignment." help="Keep the given alignment unchanged .If not, the aligned letters in the seed alignment are preserved but gaps are not necessarily preserved."> + <option value="--add" selected="true" >Yes (--add)</option> + <option value="--seed">No (--seed)</option> </param> </when> <when value='frags'/> <when value='group'/> </conditional> - <param name="keeplength" type="boolean" truevalue="--keeplength" falsevalue="" checked="True" label="Keep alignment length" help="The alignment length is unchanged. Insertions at the additional sequences are deleted" /> - <param name="map" type="boolean" truevalue="--mapout" falsevalue="" checked="False" label="Output a correspondance table of positions (--mapout)" help="Output a correspondence table of positions, sequence.map, between before and after the calculation. The --mapout option automatically turns on the --keeplength option, to keep the numbering of sites in the reference alignment" /> - <param name="reorder" type="boolean" truevalue="" falsevalue="--reorder" checked="True" label="Preserve the original order of sequences (--reorder)" /> + <param argument="--keeplength" type="boolean" truevalue="--keeplength" falsevalue="" checked="True" label="Keep alignment length" help="The alignment length is unchanged. Insertions at the additional sequences are deleted" /> + <param argument="--mapout" type="boolean" truevalue="--mapout" falsevalue="" checked="False" label="Output a correspondance table of position." help="Output a correspondence table of positions, sequence.map, between before and after the calculation. The mapout option automatically turns on the keeplength option, to keep the numbering of sites in the reference alignment" /> + <param argument="--reorder" type="boolean" truevalue="" falsevalue="--reorder" checked="True" label="Preserve the original order of sequences." /> </inputs> <outputs> <data format="fasta" name="outputAlignment" label="${tool.name} on ${on_string} : New alignment"/> @@ -63,11 +73,11 @@ <tests> <test expect_num_outputs="1" > <param name="inputSequences" value="add_seq.fa"/> - <param name="inputAlignment" value="mafft_fftns_result.aln"/> + <param name="inputAlignment" value="mafft_default.aln"/> <param name="sequenceType" value="singleseq"/> <param name="preservegap" value="--add"/> <param name="keeplength" value="--keeplength"/> - <param name="map" value=""/> + <param name="mapout" value=""/> <output name="outputAlignment" ftype="fasta" file="mafft_add_result.aln"/> </test> </tests>
--- a/mafft.xml Tue Oct 31 15:48:53 2023 +0000 +++ b/mafft.xml Wed Mar 20 07:34:52 2024 +0000 @@ -1,305 +1,542 @@ -<?xml version="1.0" encoding="UTF-8"?> -<tool id="rbc_mafft" name="MAFFT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> -<description>Multiple alignment program for amino acid or nucleotide sequences</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="biotools"/> - <expand macro="requirements" /> - <stdio> - <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> - <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" /> - </stdio> - <version_command> <![CDATA[ - mafft --version - ]]> - </version_command> - <command> - <![CDATA[ - - #if $cond_flavour.flavourType == 'custom' - #if $cond_flavour.dist_flavour.distance_method == '--fastapair' - export FASTA_4_MAFFT=`which fasta36`; - #end if - #end if - - #if $cond_flavour.flavourType != 'custom' - $cond_flavour.flavourType - #elif $cond_flavour.flavourType == 'custom' - ### full parameter options - mafft - $cond_flavour.dist_flavour.distance_method - #if $cond_flavour.dist_flavour.distance_method == '--6merpair' - --retree $cond_flavour.dist_flavour.retree - $cond_flavour.dist_flavour.distance_method.usetree.parttree - - #if $cond_flavour.dist_flavour.distance_method.usetree.parttree==--parttree - $cond_flavour.dist_flavour.distance_method.usetree.treedistance - $cond_flavour.dist_flavour.distance_method.usetree.partsize - $cond_flavour.dist_flavour.distance_method.usetree.groupsize - #end if - - #elif $cond_flavour.dist_flavour.distance_method == '--globalpair' - --weighti $cond_flavour.dist_flavour.weighti - #elif $cond_flavour.dist_flavour.distance_method == '--localpair' - --weighti $cond_flavour.dist_flavour.weighti - --lop $cond_flavour.dist_flavour.lop - --lep $cond_flavour.dist_flavour.lep - --lexp $cond_flavour.dist_flavour.lexp - #elif $cond_flavour.dist_flavour.distance_method == '--genafpair' - --weighti $cond_flavour.dist_flavour.weighti - --lop $cond_flavour.dist_flavour.lop - --lep $cond_flavour.dist_flavour.lep - --lexp $cond_flavour.dist_flavour.lexp - --LOP $cond_flavour.dist_flavour.skipLOP - --EXP $cond_flavour.dist_flavour.skipEXP 1 - #elif $cond_flavour.dist_flavour.distance_method == '--fastapair' - --weighti $cond_flavour.dist_flavour.weighti - #end if - --maxiterate $cond_flavour.iterations - $cond_flavour.fft - $cond_flavour.score - #end if - - ## specify threads to use - --thread \${GALAXY_SLOTS:-1} - $datatype - --ep $ep - --op $op - - #if $matrix_condition.matrix == "BLOSUM" - --bl $matrix_condition.BLOSUM - #elif $matrix_condition.matrix == "PAM" - --jtt $matrix_condition.PAM - --tm $matrix_condition.tm - #elif $matrix_condition.matrix == "custom" - --aamatrix '$matrix_condition.matrixfile' - --fmodel $matrix_condition.fmodel - #end if - - $reorder - $getTree - $outputFormat - '$inputSequences' > '$outputAlignment'; - - #if $getTree == "--treeout" - mv '${inputSequences}.tree' '$outputTree'; - #end if - ]]> - </command> - <inputs> - <param name="inputSequences" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format."/> - <param name="datatype" type="select" label="Data type"> - <option value="">Auto detection</option> - <option value="--nuc">Nucleic acids</option> - <option value="--amino">Amino acids</option> - </param> - <conditional name="cond_flavour"> - <param name="flavourType" type="select" label="MAFFT flavour" help="Run mafft with pre-defined input parameters. Specification of these parameters can be found in the help section."> - <option value="mafft --auto">auto</option> - <option value="mafft-fftns" selected="true">fftns</option> - <option value="mafft-fftnsi">fftnsi</option> - <option value="mafft-nwns">nwns</option> - <option value="mafft-nwnsi">nwnsi</option> - <option value="mafft-einsi">einsi</option> - <option value="mafft-ginsi">ginsi</option> - <option value="mafft-linsi">linsi</option> - <option value="mafft-qinsi">qinsi</option> - <option value="mafft-xinsi">xinsi</option> - <option value="custom">Custom Parameters</option> - </param> - <when value="mafft-fftns"/> - <when value="mafft --auto"/> - <when value="mafft-fftnsi"/> - <when value="mafft-nwns"/> - <when value="mafft-nwnsi"/> - <when value="mafft-einsi"/> - <when value="mafft-ginsi"/> - <when value="mafft-linsi"/> - <when value="mafft-qinsi"/> - <when value="mafft-xinsi"/> - <when value="custom"> - <conditional name="dist_flavour"> - <param name="distance_method" type="select" display="radio" label="Distance method" help="Distance method must be chosen regarding your data"> - <option value="--6merpair" selected="true">Shared 6mers distance (fastest)</option> - <option value="--globalpair">Global alignment (Needleman-Wunsch)</option> - <option value="--localpair">Local alignment (Smith-Waterman)</option> - <option value="--genafpair">Local, affine gap cost</option> - <option value="--fastapair">All pairwise alignments are computed with FASTA</option> - </param> - <when value="--6merpair"> - <param name="retree" type="integer" value="2" min="1" max="100" label="Guide tree is built this number of times in the progressive stage." help="Valid with 6mer distance" /> - <conditional name="usetree"> - <param name="parttree" type="select" label="Use a fast tree-building method?" help="Recommended for a large number (> ~10,000) of sequences are input" > - <option value="--parttree" selected="true">Yes</option> - <option value="">No</option> - </param> - <when value="--parttree"> - <param name="treedistance" type="select" display="radio" label="Distance method" help="Distance method must be chosen regarding your data"> - <option value="--fastaparttree" selected="true">Distances based on FASTA</option> - <option value="--dpparttree">Distances based on DP. (Needleman-Wunsch)</option> - </param> - <param name="partsize" type="integer" value="50" min="0" max="1000" label="Number of partitions in the PartTree algorithm" /> - <param name="groupsize" type="integer" value="" min="0" max="1000" label="Do not make alignment larger than ... sequences" /> - </when> - <when value=""/> - </conditional> - </when> - <when value="--globalpair"> - <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." /> - </when> - <when value="--localpair"> - <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." /> - <param name="lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" /> - <param name="lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" /> - <param name="lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" /> - </when> - <when value="--genafpair"> - <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." /> - <param name="lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" /> - <param name="lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" /> - <param name="lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" /> - <param name="skipLOP" type="float" value="-6.00" label="Gap opening penalty to skip the alignment" help="-6.00 default value" /> - <param name="skipEXP" type="float" value="0.00" label="Gap extension penalty to skip the alignment" help="0 default value" /> - </when> - <when value="--fastapair"> - <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." /> - </when> - </conditional> - <param name="iterations" type="integer" value="0" min="0" max="1000" label="Maximum number of iterations" help="1000 for maximum quality" /> - <param name="fft" type="boolean" truevalue="--fft" falsevalue="--nofft" checked="True" label="Use FFT approximation in group-to-group alignment?" /> - <param name="score" type="boolean" truevalue="" falsevalue="--noscore" checked="True" label="Check alignment score in the iterative refinement stage?" /> - </when> - </conditional> - <param name="ep" type="float" value="0.0" label="Gap extend penalty for group-to-group alignment" help="Offset value, which works like gap extension penalty, for group-to-group alignment. For E-INS-i, 0 is recommended to allow large gaps" /> - <param name="op" type="float" value="1.53" label="Gap opening penalty at group-to-group alignment." help="1.53 default value" /> - <conditional name="matrix_condition"> - <param name="matrix" type="select" label="Matrix selection" display="radio" help="Usefull only for amino acids" > - <option value="">No matrix</option> - <option value="BLOSUM" selected="true">BLOSUM</option> - <option value="PAM">PAM</option> - <option value="custom">Custom</option> - </param> - <when value=""/> - <when value="BLOSUM"> - <param name="BLOSUM" type="select" display="radio" label="Coefficient of the BLOSUM matrix"> - <option value="30">30</option> - <option value="45">45</option> - <option value="62" selected="true">62</option> - <option value="80">80</option> - </param> - </when> - <when value="PAM"> - <param name="PAM" type="integer" value="80" min="1" max="350" label="Coefficient of the JTT PAM matrix" /> - <param name="tm" type="integer" value="80" min="1" max="350" label="Coefficient of the transmembrane PAM matrix" /> - </when> - <when value="custom"> - <param name="matrixfile" type="data" format="txt" label="User-defined AA scoring matrix" help="The format of matrixfile is the same to that of BLAST. Ignored when nucleotide sequences are input."/> - <param name="fmodel" type="boolean" truevalue="--fmodel" falsevalue="" checked="False" label="Incorporate the AA/nuc composition information into the scoring matrix?" /> - </when> - </conditional> - <param name="reorder" type="boolean" truevalue="--reorder" falsevalue="" checked="False" label="Reorder output?" /> - <param name="getTree" type="boolean" truevalue="--treeout" falsevalue="" checked="False" label="Display alignment tree ?" /> - <param name="outputFormat" type="select" label="Output format" help="Either FASTA or ClustalW"> - <option value="" selected="true">FASTA</option> - <option value="--clustalout">ClustalW</option> - <option value="--phylipout">Phylip</option> - </param> - </inputs> - <outputs> - <data format="fasta" name="outputAlignment" label="${tool.name} on ${on_string}"> - <change_format> - <when input="outputFormat" value="--clustalout" format="clustal"/> - <when input="outputFormat" value="--phylipout" format="phylip"/> - </change_format> - </data> - <data name="outputTree" format="txt" label="${tool.name} Guide Tree"> - <filter>getTree == True</filter> - </data> - </outputs> - <tests> - <test expect_num_outputs="1" > - <param name="inputSequences" value="sample.fa"/> - <param name="flavourType" value="mafft-fftns"/> - <param name="outputFormat" value=""/> - <output name="outputAlignment" ftype="fasta" file="mafft_fftns_result.aln"/> - </test> - <test expect_num_outputs="1" > - <param name="inputSequences" value="sample.fa"/> - <param name="flavourType" value="mafft-nwns"/> - <param name="outputFormat" value="--clustalout"/> - <output name="outputAlignment" ftype="clustal" file="mafft_nwns_result.aln" lines_diff="2" /> - </test> - <!-- WARNING: the results of the following test depends on #threads. - The result seems deterministic for single threaded execution, i.e. GALAXY_SLOTS=1 planemo test - However, GH CI/CD uses 2 threads and results vary --> - <test expect_num_outputs="1" > - <param name="inputSequences" value="sample.fa"/> - <param name="flavourType" value="custom"/> - <conditional name="matrix_condition"> - <param name="matrix" value="BLOSUM"/> - </conditional> - <param name="BLOSUM" value="62"/> - <param name="distance_method" value="--fastapair"/> - <param name="weighti" value="2.7"/> - <param name="iterations" value="1000"/> - <param name="outputFormat" value="--clustalout"/> - <output name="outputAlignment" ftype="clustal" file="mafft_custom_result.aln" compare="sim_size"> - <assert_contents> - <has_n_lines n="458" delta="0"/> - <has_text text="CLUSTAL format alignment by MAFFT F-INS-i"/> - <has_text text="NPIVYGISHPKY"/> - <has_text text="1=="/> - <has_text text="36=="/> - <has_line line="8=opsin, ------------------------------------------------------------"/> - </assert_contents> - </output> - </test> - </tests> - <help> <![CDATA[ - **What it does** - - MAFFT is a multiple sequence alignment program for unix-like operating systems. - It offers a range of multiple alignment methods, L-INS-i (accurate; for alignment of <∼200 sequences), - FFT-NS-2 (fast; for alignment of <∼30,000 sequences), etc. - From the MAFFT man page, an overview of the different predefined flavours of the tool is as follows: - - **Accuracy-oriented methods:** - - - L-INS-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information): - - mafft --localpair --maxiterate 1000 input [> output] - - G-INS-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information): - - mafft --globalpair --maxiterate 1000 input [> output] - - E-INS-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences): - - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps. - - **Speed-oriented methods:** - - - FFT-NS-i (iterative refinement method; two cycles only): - - mafft --retree 2 --maxiterate 2 input [> output] - - FFT-NS-i (iterative refinement method; max. 1000 iterations): - - mafft --retree 2 --maxiterate 1000 input [> output] - - FFT-NS-2 (fast; progressive method): - - mafft --retree 2 --maxiterate 0 input [> output] - - FFT-NS-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree): - - mafft --retree 1 --maxiterate 0 input [> output] - - NW-NS-i (iterative refinement method without FFT approximation; two cycles only): - - mafft --retree 2 --maxiterate 2 --nofft input [> output] - - NW-NS-2 (fast; progressive method without the FFT approximation): - - mafft --retree 2 --maxiterate 0 --nofft input [> output] - - NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm): - - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output] - - **Options:** - - - --auto Automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2) - - --adjustdirection Generate reverse complement sequences, as necessary, and align them together with the remaining sequences. In the case of protein alignment, these options are just ignored. - - --op Gap opening penalty, default: 1.53 - - --ep Offset (works like gap extension penalty), default: 0.0 - - --maxiterate Maximum number of iterative refinement, default: 0 - - --clustalout Output: clustal format, default: fasta - - --retree number Guide tree is built number times in the progressive stage. Valid with 6mer distance. Default: 2 - ]]> - </help> - <expand macro="citations" /> -</tool> +<?xml version="1.0" encoding="UTF-8"?> +<tool id="rbc_mafft" name="MAFFT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Multiple alignment program for amino acid or nucleotide sequences</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements" /> + <stdio> + <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> + <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" /> + </stdio> + <version_command><![CDATA[mafft --version]]></version_command> + <command><![CDATA[ + ## Concatenate all input datasets no matter how they were provided + bash inputs.sh && + + ## Count total number of sequences across input datasets + ## Can't do this on the concatenated input data prepared above because it's + ## just a regular file and we don't have Galaxy-generated metadata for it. + #set sequence_count = 0 + #if $input.mapping == "implicit" + #for $batch in $input.batches: + #set sequence_count += int($batch.inputs.metadata.sequences) + #end for + #elif $input.mapping == "merge" + #for $batch in $input.batches: + #for $dataset in $batch.inputs: + #set sequence_count += int($dataset.metadata.sequences) + #end for + #end for + #end if + + ## For those cases in which MAFFT needs fasta3, set an env variable to make it + ## find the executable. Necessary because the current version of MAFFT still + ## expects a fasta34 executable in path, but we bundle a newer version. + #if $flavour.type == "custom" + #if $flavour.guidetree.guidetree_generation == "original" + #if $flavour.guidetree.dist_flavour.distance_method == "--fastapair" + export FASTA_4_MAFFT=`which @FASTA3_EXEC@` && + #end if + #elif $flavour.guidetree.guidetree_generation == "parttree" + #if $flavour.guidetree.parttree_selection.parttree_option == "--fastaparttree" + export FASTA_4_MAFFT=`which @FASTA3_EXEC@` && + #end if + #end if + #end if + + ## groupsize warning + #if $flavour.type == "custom" + #if $flavour.guidetree.guidetree_generation == "parttree" + #if $flavour.guidetree.parttree_selection.groupsize > $sequence_count + echo "WARNING = Chosen groupsize number larger than number of input sequences. Not recommended for MAFFT." && + #end if + #end if + #end if + + ## run MAFFT with predefined MSA flavours or custom settings + #if $flavour.type == "custom" + mafft + #if $flavour.guidetree.guidetree_generation == "original" + #if $flavour.guidetree.dist_flavour.distance_method == "--6merpair" + --6merpair + --retree $flavour.guidetree.dist_flavour.retree + #elif $flavour.guidetree.dist_flavour.distance_method == "--globalpair" + --globalpair + --weighti $flavour.guidetree.dist_flavour.weighti + #if $flavour.guidetree.dist_flavour.treat_unrelated_segments.unalignlevel > 0 + --allowshift --unalignlevel $flavour.guidetree.dist_flavour.treat_unrelated_segments.unalignlevel + #end if + $flavour.guidetree.dist_flavour.treat_unrelated_segments.leavegappyregion + #elif $flavour.guidetree.dist_flavour.distance_method == "--localpair" + --localpair + --weighti $flavour.guidetree.dist_flavour.weighti + --lop $flavour.guidetree.dist_flavour.lop + --lep $flavour.guidetree.dist_flavour.lep + --lexp $flavour.guidetree.dist_flavour.lexp + #elif flavour.guidetree.dist_flavour.distance_method == "--genafpair" + --genafpair + --weighti $flavour.guidetree.dist_flavour.weighti + --lop $flavour.guidetree.dist_flavour.lop + --lep $flavour.guidetree.dist_flavour.lep + --lexp $flavour.guidetree.dist_flavour.lexp + --LOP $flavour.guidetree.dist_flavour.LOP + --LEXP $flavour.guidetree.dist_flavour.LEXP + #elif $flavour.guidetree.dist_flavour.distance_method == "--fastapair" + --fastapair + --weighti $flavour.guidetree.dist_flavour.weighti + #end if + #elif $flavour.guidetree.guidetree_generation == "parttree" + $flavour.guidetree.parttree_selection.parttree_option + --retree $flavour.guidetree.parttree_selection.retree + --partsize $flavour.guidetree.parttree_selection.partsize + #if $flavour.guidetree.parttree_selection.groupsize != -1 + --groupsize $flavour.guidetree.parttree_selection.groupsize + #end if + #end if + ## progressive alignment calculation + --maxiterate $flavour.progressive_alignment_calculation.maxiterate + $flavour.progressive_alignment_calculation.fft + $flavour.progressive_alignment_calculation.noscore + #else + $flavour.type + #if $flavour.type == "mafft-ginsi" or "--globalpair" in str($flavour.type) + #if $flavour.treat_unrelated_segments.unalignlevel > 0 + --allowshift --unalignlevel $flavour.treat_unrelated_segments.unalignlevel + #end if + $flavour.treat_unrelated_segments.leavegappyregion + #end if + #end if + + ## handle scoring matrix + $datatype_selection.datatype + #if $datatype_selection.datatype != "" + #if $datatype_selection.scoring_matrix.type == "custom" + --aamatrix '$datatype_selection.scoring_matrix.aamatrix' + #else + $datatype_selection.scoring_matrix.type $datatype_selection.scoring_matrix.coefficient + #end if + $datatype_selection.fmodel + ## gap penalties + #if $datatype_selection.gap_costs.use_defaults == "no" + --ep $datatype_selection.gap_costs.ep --op $datatype_selection.gap_costs.op + #end if + #end if + + + ## output options + $reorder + $outputFormat + $treeout + + ## specify threads to use + ## disable multithreading during iterative refinement step for reproducibility + ## cmp. https://mafft.cbrc.jp/alignment/software/multithreading.html + --thread \${GALAXY_SLOTS:-1} --threadit 0 + + input.fa > '$outputAlignment' + + ## Output alignment tree + #if $treeout + && mv input.fa.tree '$outputTree' + #end if + ]]></command> + <configfiles> + <configfile filename="inputs.sh"><![CDATA[ + #if $input.mapping == "implicit" + #for $batch in $input.batches: +cat $batch.inputs >> input.fa + #end for + #elif $input.mapping == "merge" + #for $batch in $input.batches: + #for $dataset in $batch.inputs: +cat $dataset >> input.fa + #end for + #end for + #end if + ]]></configfile> + </configfiles> + <inputs> + <conditional name="input"> + <param name="mapping" type="select" label="For multiple inputs generate" help="All you have is a single dataset with the sequences to align? You can skip this help text and continue with the default setting. For multiple input datasets, the first mode will launch separate MAFFT jobs for all sequences from the first, second, ..., n-th dataset/element from each input batch, respectively, resulting in n separate MSAs. The second mode will concatenate all input sequences from all inputs for a single run of MAFFT and will generate a single MSA."> + <option value="implicit">one or several MSAs depending on input structure</option> + <option value="merge">a single MSA of all sequences from all inputs</option> + </param> + <when value="implicit"> + <repeat name="batches" title="Input batch" default="1" min="1"> + <param name="inputs" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format. Add Dataset for concatenation of every additional dataset with each file of the first upload panel"/> + </repeat> + </when> + <when value="merge"> + <repeat name="batches" title="Input batch" default="1" min="1"> + <param name="inputs" multiple="true" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format."/> + </repeat> + </when> + </conditional> + <conditional name="datatype_selection"> + <param name="datatype" type="select" label="Type of sequences" help="The tool can try to detect the type of the input sequences, but you likely want to declare it explicitly. Doing so will also give you control over the scoring matrix used for the alignment, while autodetection will result in the Kimura PAM200 and the BLOSUM62 matrix being used for nucleic acids and protein alignments, respectively."> + <option value="">auto-detect</option> + <option value="--nuc">Nucleic acids</option> + <option value="--amino">Amino acids</option> + </param> + <when value="" /> + <when value="--nuc"> + <conditional name="scoring_matrix"> + <param name="type" type="select" label="Type of scoring matrix" help="See the tool help below for details about the available options."> + <option value="--kimura">Kimura</option> + </param> + <when value="--kimura"> + <param argument="--kimura" name="coefficient" type="integer" value="200" min="1" label="PAM value of the matrix" /> + </when> + </conditional> + <expand macro="misc_scoring_scheme" /> + </when> + <when value="--amino"> + <conditional name="scoring_matrix"> + <param name="type" type="select" label="Type of scoring matrix" help="See the tool help below for details about the available options."> + <option value="--bl" selected="true">BLOSUM</option> + <option value="--jtt">JTT</option> + <option value="--tm">transmembrane protein-optimized JTT</option> + <option value="custom">custom matrix</option> + </param> + <when value="--bl"> + <param argument="--bl" name="coefficient" type="select" display="radio" label="Coefficient of the BLOSUM matrix"> + <option value="30">30</option> + <option value="45">45</option> + <option value="62" selected="true">62</option> + <option value="80">80</option> + </param> + </when> + <when value="--jtt"> + <param argument="--jtt" name="coefficient" type="integer" value="200" min="1" label="PAM value of the matrix" /> + </when> + <when value="--tm"> + <param argument="--tm" name="coefficient" type="integer" value="200" min="1" label="PAM value of the matrix"/> + </when> + <when value="custom"> + <param argument="--aamatrix" type="data" format="txt" label="User-defined AA scoring matrix" help="The expected format of the matrix is the same as that used by BLAST."/> + </when> + </conditional> + <expand macro="misc_scoring_scheme" /> + </when> + </conditional> + <conditional name="flavour"> + <param name="type" type="select" label="MAFFT flavour" help="Run mafft with pre-defined input parameters. Specification of these parameters can be found in the help section. With 'Auto', the tool automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size from few to many respectively. Default setting: FFT-NS-2."> + <option value="mafft --auto">Auto</option> + <option value="mafft-fftns --retree 1">FFT-NS-1 (very fast, progressive method; use for >2,000 sequences)</option> + <option value="mafft-fftns" selected="true">FFT-NS-2 (fast, progressive method)</option> + <option value="mafft-nwns">NW-NS-2 (fast, progressive method without FFT approximation)</option> + <option value="mafft --retree 1 --maxiterate 0 --nofft --parttree">NW-NS-PartTree-1 (very fast, progressive method using the PartTree algorithm; for ~10,000 to ~50,000 sequences)</option> + <option value="mafft --maxiterate 0 --globalpair">G-INS-1 (slow, progressive method with an accurate guide tree)</option> + <option value="mafft-fftnsi">FFT-NS-i (slow, iterative refinement method)</option> + <option value="mafft-nwnsi">NW-NS-i (slow, iterative refinement method without FFT approximation)</option> + <option value="mafft-einsi">E-INS-i (very slow; use for <200 sequences with multiple conserved domains and long gaps)</option> + <option value="mafft-linsi">L-INS-i (very slow; use for <200 sequences with one conserved domain and long gaps)</option> + <option value="mafft-ginsi">G-INS-i (very slow; recommended for <200 sequences with global homology)</option> + <option value="custom">Custom Parameters</option> + </param> + <when value="mafft --auto"/> + <when value="mafft-fftns --retree 1"/> + <when value="mafft-fftns"/> + <when value="mafft-nwns"/> + <when value="mafft --retree 1 --maxiterate 0 --nofft --parttree"/> + <when value="mafft --maxiterate 0 --globalpair"> + <expand macro="global_align_options"/> + </when> + <when value="mafft-fftnsi"/> + <when value="mafft-nwnsi"/> + <when value="mafft-einsi"/> + <when value="mafft-linsi"/> + <when value="mafft-ginsi"> + <expand macro="global_align_options"/> + </when> + <when value="custom"> + <conditional name="guidetree"> + <param name="guidetree_generation" type="select" label="GuideTree-Generation" help="Parttree is recommended for a large number (> ~10,000) of sequences as input"> + <option value="original">Original guidetree building method of MAFFT</option> + <option value="parttree">Fast guidetree building method with PartTree-algorithm</option> + </param> + <when value="original"> + <conditional name="dist_flavour"> + <param name="distance_method" type="select" label="Distance method" help="Distance method must be chosen regarding your data"> + <option value="--6merpair" selected="true">Shared 6mers distance (fastest) (--6merpair)</option> + <option value="--globalpair">Global alignment (Needleman-Wunsch) (--globalpair)</option> + <option value="--localpair">Local alignment (Smith-Waterman) (--localpair)</option> + <option value="--genafpair">Local, affine gap cost (--genafpair)</option> + <option value="--fastapair">All pairwise alignments are computed with FASTA (--fastapair)</option> + </param> + <when value="--6merpair"> + <param argument="--retree" type="integer" value="2" min="1" max="3" label="Guide tree is build this number of times in the progressive stage."/> + </when> + <when value="--globalpair"> + <expand macro="global_align_options"/> + <expand macro="weighti_param" /> + </when> + <when value="--localpair"> + <expand macro="weighti_param" /> + <param argument="--lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value"/> + <param argument="--lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value"/> + <param argument="--lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" /> + </when> + <when value="--genafpair"> + <expand macro="weighti_param" /> + <param argument="--lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" /> + <param argument="--lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" /> + <param argument="--lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" /> + <param argument="--LOP" type="float" value="-6.00" label="Gap opening penalty to skip the alignment" help="-6.00 default value" /> + <param argument="--LEXP" type="float" value="0.00" label="Gap extension penalty to skip the alignment" help="0 default value" /> + </when> + <when value="--fastapair"> + <expand macro="weighti_param" /> + </when> + </conditional> + </when> + <when value="parttree"> + <conditional name="parttree_selection"> + <param name="parttree_option" type="select" label="Which distance for the fast tree-building method?"> + <option value="--parttree" selected="true">Fast tree-building method with the 6mer distance (--parttree)</option> + <option value="--fastaparttree">Distances based on FASTA (--fastaparttree)</option> + <option value="--dpparttree">Distances based on DP. (Needleman-Wunsch) (--dpparttree)</option> + </param> + <when value="--parttree"> + <expand macro="parttree_parameters" /> + </when> + <when value="--fastaparttree"> + <expand macro="parttree_parameters" /> + </when> + <when value="--dpparttree"> + <expand macro="parttree_parameters" /> + </when> + </conditional> + </when> + </conditional> + <section name="progressive_alignment_calculation" title="Progressive alignment calculation" expanded="true"> + <param argument="--maxiterate" type="integer" value="0" min="0" max="1000" label="Maximum number of iterations" help="1000 for maximum quality" /> + <param argument="--fft" type="boolean" truevalue="--fft" falsevalue="--nofft" checked="True" label="Use FFT approximation in group-to-group alignment?" /> + <param argument="--noscore" type="boolean" truevalue="" falsevalue="--noscore" checked="True" label="Check alignment score in the iterative refinement stage?" /> + </section> + </when> + </conditional> + <param argument="--reorder" type="boolean" truevalue="--reorder" falsevalue="" checked="False" label="Reorder output?" help="Default order is input order." /> + <param argument="--treeout" type="boolean" truevalue="--treeout" falsevalue="" checked="False" label="Keep alignment tree as output?" /> + <param name="outputFormat" type="select" label="Output format"> + <option value="" selected="true">FASTA</option> + <option value="--clustalout">ClustalW</option> + <option value="--phylipout">Phylip</option> + </param> + </inputs> + <outputs> + <data format="fasta" name="outputAlignment" label="${tool.name} on ${on_string}"> + <change_format> + <when input="outputFormat" value="--clustalout" format="clustal"/> + <when input="outputFormat" value="--phylipout" format="phylip"/> + </change_format> + </data> + <data name="outputTree" format="txt" label="${tool.name} Guide Tree"> + <filter>treeout</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="input"> + <param name="mapping" value="implicit"/> + <repeat name="batches"> + <param name="inputs" value="sample_amino.fa"/> + </repeat> + </conditional> + <output name="outputAlignment" ftype="fasta" file="mafft_default.aln"/> + </test> + <!-- test autodetection of suitable algorithm from input; expected to choose L-INS-i --> + <test expect_num_outputs="1"> + <conditional name="input"> + <param name="mapping" value="implicit"/> + <repeat name="batches"> + <param name="inputs" value="sample_amino.fa"/> + </repeat> + </conditional> + <conditional name="flavour"> + <param name="type" value="mafft --auto"/> + </conditional> + <output name="outputAlignment" ftype="fasta" file="mafft_auto_linsi.aln"/> + </test> + <!-- test explicit specification of L-INS-i mode --> + <test expect_num_outputs="1"> + <conditional name="input"> + <param name="mapping" value="implicit"/> + <repeat name="batches"> + <param name="inputs" value="sample_amino.fa"/> + </repeat> + </conditional> + <conditional name="flavour"> + <param name="type" value="mafft-linsi"/> + </conditional> + <output name="outputAlignment" ftype="fasta" file="mafft_auto_linsi.aln"/> + </test> + <test expect_num_outputs="1"> + <conditional name="input"> + <param name="mapping" value="implicit"/> + <repeat name="batches"> + <param name="inputs" value="sample_amino.fa"/> + </repeat> + </conditional> + <conditional name="datatype_selection"> + <param name="datatype" value="--amino"/> + <conditional name="scoring_matrix"> + <param name="type" value="--bl"/> + <param name="coefficient" value="80"/> + </conditional> + </conditional> + <conditional name="flavour"> + <param name="type" value="mafft-fftns"/> + </conditional> + <param name="outputFormat" value="--clustalout"/> + <output name="outputAlignment" ftype="clustal" file="mafft_explicit_amino_blosum80.clustal.aln" /> + </test> + <test expect_num_outputs="1" > + <conditional name="input"> + <param name="mapping" value="implicit"/> + <repeat name="batches"> + <param name="inputs" value="sample_nuc.fa"/> + </repeat> + </conditional> + <conditional name="datatype_selection"> + <param name="datatype" value="--nuc"/> + <conditional name="scoring_matrix"> + <param name="type" value="--kimura"/> + <param name="coefficient" value="40"/> + </conditional> + </conditional> + <conditional name="flavour"> + <param name="type" value="mafft-fftns"/> + </conditional> + <param name="outputFormat" value="--phylipout"/> + <output name="outputAlignment" ftype="phylip" file="mafft_kimura40.phylip.aln" /> + </test> + <test expect_num_outputs="1"> + <conditional name="input"> + <param name="mapping" value="implicit"/> + <repeat name="batches"> + <param name="inputs" value="sample_amino.fa"/> + </repeat> + </conditional> + <conditional name="datatype_selection"> + <param name="datatype" value="--amino"/> + </conditional> + <conditional name="flavour"> + <param name="type" value="custom"/> + <conditional name="guidetree"> + <param name="guidetree_generation" value="original"/> + <conditional name="dist_flavour"> + <param name="distance_method" value="--globalpair"/> + <param name="weighti" value="3"/> + </conditional> + </conditional> + <section name="progressive_alignment_calculation"> + <param name="maxiterate" value="1000"/> + </section> + </conditional> + <param name="outputFormat" value="--clustalout"/> + <output name="outputAlignment" ftype="clustal" file="mafft_custom_original.clustal.aln"> + </output> + </test> + <test expect_num_outputs="1"> + <conditional name="input"> + <param name="mapping" value="implicit"/> + <repeat name="batches"> + <param name="inputs" value="sample_amino.fa"/> + </repeat> + </conditional> + <conditional name="datatype_selection"> + <param name="datatype" value="--amino"/> + </conditional> + <conditional name="flavour"> + <param name="type" value="custom"/> + <conditional name="guidetree"> + <param name="guidetree_generation" value="parttree"/> + <conditional name="parttree_selection"> + <param name="parttree_option" value="--parttree"/> + <param name="retree" value="2"/> + </conditional> + </conditional> + </conditional> + <output name="outputAlignment" ftype="fasta" file="mafft_custom_parttree.aln" /> + </test> + <!-- test concatenation of multiple inputs --> + <test expect_num_outputs="2"> + <conditional name="input"> + <param name="mapping" value="merge"/> + <repeat name="batches"> + <param name="inputs" value="sample_amino.fa"/> + </repeat> + <repeat name="batches"> + <param name="inputs" value="sample_nuc.fa"/> + </repeat> + </conditional> + <param name="treeout" value="true"/> + <output name="outputAlignment" ftype="fasta"> + <metadata name="sequences" value="39"/> + </output> + </test> + </tests> + <help><![CDATA[ +**What it does** + +MAFFT is a multiple sequence alignment (MSA) program, which offers a range of multiple alignment methods. + +Input types and alignment scoring matrices +------------------------------------------ + +For the alignment of *protein* sequences, you can choose between: + +- different flavors of BLOSUM matrices (`Henikoff S and Henikoff JG, 1992 <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC50453/>`__) +- JTT matrices with any point accepted mutation (PAM) rate (`Jones, Taylor and Thornton, 1992 <https://pubmed.ncbi.nlm.nih.gov/1633570/>`__) +- PAM-based matrices optimized for transmembrane proteins (`Jones, Taylor and Thornton, 1994 <https://pubmed.ncbi.nlm.nih.gov/8112466/>`__) + +For nucleic acid sequence alignment, MAFFT uses Kimura's two parameter model (`Kimura 1980 <https://pubmed.ncbi.nlm.nih.gov/7463489/>`__) +with a transitions to transversions ratio of 2 (kappa 2), but lets you configure the PAM value. + +The tool can also try to autodetect the sequence type from the input(s). +In this mode, it will use the BLOSUM 62 matrix if it detects amino acids input, and the Kimura kappa 2 PAM200 matrix for nucleic acids. + + +Pre-configured MSA methods +-------------------------- + +From the `MAFFT man page <https://mafft.cbrc.jp/alignment/software/manual/manual.html>`__, an overview of the different predefined flavours of the tool. + +**Accuracy-oriented methods:** + +- *L-INS-i* (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information): + + - mafft --localpair --maxiterate 1000 input [> output] +- *G-INS-i* (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information): + + - mafft --globalpair --maxiterate 1000 input [> output] +- *E-INS-i* (suitable for sequences containing large unalignable regions; recommended for <200 sequences): + + - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps. + +**Speed-oriented methods:** + +- *FFT-NS-i* (iterative refinement method; two cycles only): + + - mafft --retree 2 --maxiterate 2 input [> output] +- *FFT-NS-2* (fast; progressive method): + + - mafft --retree 2 --maxiterate 0 input [> output] +- *NW-NS-i* (iterative refinement method without FFT approximation; two cycles only): + + - mafft --retree 2 --maxiterate 2 --nofft input [> output] +- *NW-NS-2* (fast; progressive method without the FFT approximation): + + - mafft --retree 2 --maxiterate 0 --nofft input [> output] +- *NW-NS-PartTree-1* (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm): + + - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output] +- *FFT-NS-1* (very fast; recommended for >2000 sequences; progressive method with a rough guide tree): + + - mafft --retree 1 --maxiterate 0 input [> output] + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mafft_auto_linsi.aln Wed Mar 20 07:34:52 2024 +0000 @@ -0,0 +1,504 @@ +> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] +M------------------NGTE-G------DNFYVPF----SNKTGLARSPYEYPQY-- +--------------------------------------------------YLAEPW---- +---------------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLA +MANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERY +IVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQCSCG +PDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------STQKAEKEVTRMVVLMVIGFLVCWVPYAS +VAFYIFT---HQGS-DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTL----- +CC-----GKNPLGDDE-SGA-STSK-TEVSSVS-TSPVSPA------------------- +------------------------------------------------------------ +--------------- +> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94] +M------------------NGTE-G------PNFYVPF----SNITGVVRSPFEQPQY-- +--------------------------------------------------YLAEPW---- +---------------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA +VADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERY +VVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQCSCG +IDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------TTQKAEKEVTRMVIIMVIFFLICWLPYAS +VAMYIFT---HQGS-NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSL----- +CC-----GKNPLGDDE-ASA-TASK-TETSQVA-PA------------------------ +------------------------------------------------------------ +--------------- +> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9 +M------------------NGTE-G------INFYVPM----SNKTGVVRSPFEYPQY-- +--------------------------------------------------YLAEPW---- +---------------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLA +VADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERY +IVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQCSCG +PDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------TTQKAEKEVTRMVILMVLGFMLAWTPYAV +VAFWIFT---NKGA-DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTI----- +CC-----GKNPFGDEDVSSTVSQSK-TEVSSVS-SSQVSPA------------------- +------------------------------------------------------------ +--------------- +> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish +M------------------NGTE-G------KNFYVPM----SNRTGLVRSPFEYPQY-- +--------------------------------------------------YLAEPW---- +---------------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLA +VAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERY +IVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQCSCG +PDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------STQKAEREVTKMVILMVFGFLIAWTPYAT +VAAWIFF---NKGA-DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTI----- +FC-----GKNPLGDDE-SSTVSTSK-TEVSSVS-PA------------------------ +------------------------------------------------------------ +--------------- +> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish +M------------------NGTE-G------NNFYVPL----SNRTGLVRSPFEYPQY-- +--------------------------------------------------YLAEPW---- +---------------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLA +VAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERY +IVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQCSCG +PDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------STQKAEREVTKMVILMVLGFLVAWTPYAT +VAAWIFF---NKGA-AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTL----- +FC-----GKNPLGDEE-SSTVSTSK-TEVSSVS-PA------------------------ +------------------------------------------------------------ +--------------- +> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208- +M------------------KQVP-EFH----EDFYIPIPL--DINNLSAYSPFLVPQD-- +--------------------------------------------------HLGNQG---- +---------------IFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLS +IANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERW +LVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLFG-WS-----RYIPEGLQCSCG +PDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------STQKAEREVTKMVVVMVLGFLVCWAPYAS +FSLWIVS---HRGE-EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMV----- +C------GKN-IEEDE-AST-SSQV-TQVSSVA-PEK----------------------- +------------------------------------------------------------ +--------------- +> 7== M13299 1 human BCP <>[Science232(4747),193-202'86] +M------------------RKMS-E------EEFYL-------FKNISSVGPWDGPQY-- +--------------------------------------------------HIAPVW---- +---------------AFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVS +FGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERY +IVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-WS-----RFIPEGLQCSCG +PDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------TTQKAEREVSRMVVVMVGSFCVCYVPYAA +FAMYMVN---NRNH-GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMV----- +C------GKA-MTDES-DTC-SSQK-TEVSTVS-STQVGPN------------------- +------------------------------------------------------------ +--------------- +> 8=opsin, greensensitive human (fragment) S07060 +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERW +LVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTSCG +PDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------STQKAEKEVTRMVVVMVLAFC-------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------- +> 9== K03494 1 human GCP <>[Science232(4747),193-202'86] +MAQQWSLQRLAGRHPQDSYEDSTQS------SIFTY-------TNSNSTRGPFEGPNY-- +--------------------------------------------------HIAPRW---- +---------------VYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLA +VADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERW +MVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTSCG +PDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------STQKAEKEVTRMVVVMVLAFCFCWGPYAF +FACFAAA---NPGY-PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF----- +-------GKK-VDDGS-ELS-SASK-TEVSSVS---SVSPA------------------- +------------------------------------------------------------ +--------------- +> 10== Z68193 1 human Red Opsin <>[] +MAQQWSLQRLAGRHPQDSYEDSTQS------SIFTY-------TNSNSTRGPFEGPNY-- +--------------------------------------------------HIAPRW---- +---------------VYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLA +VADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERW +LVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-WS-----RYWPHGLKTSCG +PDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------STQKAEKEVTRMVVVMIFAYCVCWGPYTF +FACFAAA---NPGY-AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF----- +-------GKK-VDDGS-ELS-SASK-TEVSSVS---SVSPA------------------- +------------------------------------------------------------ +--------------- +> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92] +MTEAWNVAVFAARRSRDD-DDTTRG------SVFTY-------TNTNNTRGPFEGPNY-- +--------------------------------------------------HIAPRW---- +---------------VYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLA +FVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERW +FVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-WS-----RYWPHGLKTSCG +PDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------STQKAEREVSRMVVVMIVAFCICWGPYAS +FVSFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF----- +-------GKK-VDDGS-EAS-TTSR-TEVSSVS-NSSVAPA------------------- +------------------------------------------------------------ +--------------- +> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] +MA-AWE-AAFAARRRHEE-EDTTRD------SVFTY-------TNSNNTRGPFEGPNY-- +--------------------------------------------------HIAPRW---- +---------------VYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLA +VADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERW +FVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-WS-----RYWPHGLKTSCG +PDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------STQKAEKEVSRMVVVMIVAYCFCWGPYTF +FACFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF----- +-------GKK-VDDGS-EVS-T-SR-TEVSSVS-NSSVSPA------------------- +------------------------------------------------------------ +--------------- +> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] +-----------------------MS------SNSSQ-------APPNGTPGPFDGPQW-- +-------------------------------------------------PYQAPQS---- +---------------TYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLA +VADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERY +VVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-WS-----SYVPEGLRTSCG +PNWYTGGSN--NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------TTQRAEREVTRMVIVMVMAFLLCWLPYST +FALVVAT---HKGI-IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEML----- +CCGYQ--PQR-TGKAS-PGT-PGPH-ADVTAAGLRNKVMPAHPV---------------- +------------------------------------------------------------ +--------------- +> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] +M------------------ESGNVS------SSLF------------GNVSTALRPEA-- +-------------------------RLSA---E---TRLLGWNVPPEELRHIPEHWLTYP +----------EPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLA +FCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRF +NVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYLTSCT +FDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVES +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS-------NVDKNKETAEIRIAKAAITICFLFFCSWTPYGV +MSLIGAF---GDKT-LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWL-- +AL-----NEK-APESS-AVA-STST-TQEPQQT--------------------------- +TAA--------------------------------------------------------- +--------------- +> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 +M------------------EYHNVS------SVL-------------GNVSSVLRPDA-- +-------------------------RLSA---E---SRLLGWNVPPDELRHIPEHWLIYP +----------EPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLA +FCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRY +NVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYLTSCT +FDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVDS +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS-------NVDKSKEAAEIRIAKAAITICFLFFASWTPYGV +MSLIGAF---GDKT-LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWL-- +AI-----SEK-APESR-AAI-STST-TQEQQQT--------------------------- +TAA--------------------------------------------------------- +--------------- +> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] +M------------------EPLC------------------------NASEPPLRPEA-- +-------------------------R-SSGNGD---LQFLGWNVPPDQIQYIPEHWLTQL +----------EPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLA +VFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRY +NVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCS +FDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMNVES +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGV +MSLIGAF---GDKS-LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWL-- +GV-----NEK-SGEIS-SAQ-STTT-QEQQQTT--------------------------- +AA---------------------------------------------------------- +--------------- +> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 +M------------------DALC------------------------NASEPPLRPEA-- +-------------------------RMSSGSDE---LQFLGWNVPPDQIQYIPEHWLTQL +----------EPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLA +VFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRY +NVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCS +FDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMNVES +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGV +MSLIGAF---GDKS-LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWL-- +GV-----NEK-SGEAS-SAQ-STTT-QEQTQQT--------------------------- +SAA--------------------------------------------------------- +--------------- +> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1 +M------------------TNAT-------------------------------GPQMAY +-------YG--------------AASMDFGYPE---GVSIVDFVRPEIKPYVHQHWYNYP +----------PVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLA +LSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRY +NIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-WG-----NYILEGILDSCS +YDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMNVST +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS--------NEADAQRAEIRIAKTALVNVSLWFICWTPYAL +ISLKGVM---GDTS-GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWF-- +CV-----HET-ETKSN-DDS-QSNS-TVAQDKA--------------------------- +------------------------------------------------------------ +--------------- +> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1 +M------------------ANVT-------------------------------GPQMAF +-------YG--------------SGAATFGYPE---GMTVADFVPDRVKHMVLDHWYNYP +----------PVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLA +LSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRY +NIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-WG-----SYTLEGILDSCS +YDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMNVTN +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS--------NEAETQRAEIRIAKTALVNVSLWFICWTPYAA +ITIQGLL---GNAE-GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWF-- +CV-----HEK-DPNDV-EEN-QSSN-TQTQEKS--------------------------- +------------------------------------------------------------ +--------------- +> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] +M------------------ESFA-------------------------VAAAQLGPHF-- +------------------------APLS--------NGSVVDKVTPDMAHLISPYWNQFP +----------AMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLA +ISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRY +QVIVKGMAGR-PMTIPLALGKM---------------------------YVPEGNLTSCG +IDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKS +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLV +INCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC-- +VF-----GKV-DDGKS-SDA-QSQA-TASEAES------KA------------------- +------------------------------------------------------------ +--------------- +> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] +M------------------ESFA-------------------------VAAAQLGPHF-- +------------------------APLS--------NGSVVDKVTPDMAHLISPYWNQFP +----------AMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLA +ISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRY +QVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNLTSCG +IDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKS +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLV +INCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC-- +VF-----GKV-DDGKS-SDA-QSQA-TASEAES------KA------------------- +------------------------------------------------------------ +--------------- +> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' +M------------------DSFA-------------------------AVATQLGPQF-- +------------------------AAPS--------NGSVVDKVTPDMAHLISPYWDQFP +----------AMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLA +ISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRY +QVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNLTSCG +IDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKS +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS--------SEDADKSAEGKLAKVALVTISLWFMAWTPYLV +INCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC-- +VF-----GKV-DDGKS-SEA-QSQA-TTSEAES------KA------------------- +------------------------------------------------------------ +--------------- +> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] +M------------------ERSH--------------------LPETPFDLAHSGPRF-- +------------------------QAQSSG------NGSVLDNVLPDMAHLVNPYWSRFA +----------PMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLA +FSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRY +NVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-WS-----AYVPEGNLTACS +IDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMNVKS +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS--------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLV +ICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMC-- +VF-----GNT-DEPKP-DAP-ASDTETTSEADS------KA------------------- +------------------------------------------------------------ +--------------- +> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 +M------------------ERSL--------------------LPEPPLAMALLGPRF-- +------------------------EAQTGG------NRSVLDNVLPDMAPLVNPHWSRFA +----------PMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLA +FSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRY +NVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-WS-----SYVPEGNLTACS +IDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMNVKS +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS--------SEDCDKSAENKLAKVALTTISLWFMAWTPYLI +ICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMC-- +VC-----GTT-DEPKP-DAP-PSDTETTSEAES------KD------------------- +------------------------------------------------------------ +--------------- +> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] +M-------------------------------------------------IAVSGPSY-- +------------------------EAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYP +----------PLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLA +ISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRY +NVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-WN-----RYVPEGNMTACG +TDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMNVAS +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRS--------SENQNTSAECKLAKVALMTISLWFMAWTPYLV +INFSGIF---NL-V-KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSL-- +AC-----AA--EPSSD-AVS-TTSG-TTTVTDN------EK------------------S +NA---------------------------------------------------------- +--------------- +> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] +M------------------ANQL---------------------------------SY-- +------------------------SSLGWPYQP---NASVVDTMPKEMLYMIHEHWYAFP +----------PMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLA +FSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRY +NVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-WS-----RYVPEGNLTSCT +VDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMNVAS +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRA-------NADQQKQSAECRLAKVAMMTVGLWFMAWTPYLI +ISWAGVF---SSGT-RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSL-- +AC-----GSG-ESGSD-VKS-EASA-TTTMEEK------PKIPEA--------------- +------------------------------------------------------------ +--------------- +> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] +M------------------VEST----------------------TLVNQTWWYNPTV-- +--------------------------------------------------DIHPHWAKFD +----------PIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLA +MSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRY +NVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-WG-----AYVPEGILTSCS +FDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLNAKE +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRK---------AQAGASAEMKLAKISMVIITQFMLSWSPYAI +IALLAQF---GPAE-WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLT +CCQFD------EKECE-DAN-DAEE-EVVASER----GGESRDAAQMKEMMAMMQKMQAQ +QAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAP +QGAPPQGVDNQAYQA +> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93] +M------------------GRDL-----------------------RDNETWWYNPSI-- +--------------------------------------------------VVHPHWREFD +----------QVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLA +FSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRY +NVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-WG-----AYTLEGVLCNCS +FDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLNAKE +------------------------------------------------------------ +------------------------------------------------------------ +-----------------LRK---------AQAGANAEMRLAKISIVIVSQFLLSWSPYAV +VALLAQF---GPLE-WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLT +CCQFD------DKETE-DDK-DAET-EIPAGES--SDAAPSADAAQMKEMMAMMQKMQQQ +QAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP----- +-AAPPQGVDNQAYQA +> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra +MM-----------------DVNSSGR-----PDLYGHLRS-FLLPEVGRGLPDLSPDG-- +----------GADPVAGSWAP---HLLS------------------EVTASPAPTWDAPP +DNASGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLA +LADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRY +LGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLIS +QDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG------- +-------FPRVEPDSVIALNG--------------------------------------- +-----------------------IVKLQKE-------------------VEECAN----- +-------------LSRLLKHE------RKNISIFKREQKAATTLGIIVGAFTVCWLPFFL +LSTARPFICGTSCS-CIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLL----- +QCQYRNINRK-------LSAAGMHE-ALKLAER------PERPEFVL------------Q +NADY-------------------------------------------------------- +-------CRKKGHDS +> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] +M----------------------------------------------------------- +--------------------P---HLLS--------GFL-------EVTASPAPTWDAPP +DNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLA +LADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRY +LGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLIS +QDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG------- +-------FPRVQPESVISLNG--------------------------------------- +-----------------------VVKLQKE-------------------VEECAN----- +-------------LSRLLKHE------RKNISIFKREQKAATTLGIIVGAFTVCWLPFFL +LSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLL----- +QCQYRNINRK-------LSAAGMHE-ALKLAER------PERSEFVL------------Q +NSDH-------------------------------------------------------- +-------CGKKGHDT +> 31=p A47425 serotonin receptor 5HT-7 - rat +M----------------------------------------------------------- +--------------------P---HLLS--------GFL-------EVTASPAPTWDAPP +DNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLA +LADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRY +LGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLIS +QDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG------- +-------FPRVQPESVISLNG--------------------------------------- +-----------------------VVKLQKE-------------------VEECAN----- +-------------LSRLLKHE------RKNISIFKREQKAATTLGIIVGAFTVCWLPFFL +LSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLL----- +QCQYRNINRK-------LSAAGMHE-ALKLAER------PERSEFVL------------Q +NSDH-------------------------------------------------------- +-------CGKKGHDT +> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] +M------------------DVLSPG-------------------------------QG-- +------------------------NNTTSPPAPF-E---------------TGGNTTGIS +----------DVTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLA +VTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRY +WAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTIS +KDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT +RHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALC----------------- +-------------------------------ANGAVRQGDDGAALEVIEVHRVGNSKEHL +PLPSEAG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFI +VALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII----- +KCKFCRQ----------------------------------------------------- +------------------------------------------------------------ +--------------- +> 33=p A35181 serotonin receptor class 1A - rat +M------------------DVFSFG-------------------------------QG-- +------------------------NNTTASQEPF-G---------------TGGNVTSIS +----------DVTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLA +VTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRY +WAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTIS +KDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT +SLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPC----------------- +-------------------------------TNGAVRQGDDEATLEVIEVHRVGNSKEHL +PLPSESG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFI +VALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII----- +KCKFCRR----------------------------------------------------- +------------------------------------------------------------ +--------------- +> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] +M------------------ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLV +TSDFNDSYGLTGQFINGSHSSRSRDNASANDT--------------SATNMTDDRYWSLT +----------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLA +VADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRY +WAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIIS +QDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKT +EETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN +ANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK-- +-----------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFI +IALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL----- +FGKYRRGHR--------------------------------------------------- +------------------------------------------------------------ +--------------- +> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail +M------------------ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLV +TSDFNDSYGLTGQFINGSHSSRSRDNASANDT--------------SATNMTDDRYWSLT +----------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLA +VADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRY +WAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIIS +QDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKT +EETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN +ANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK-- +-----------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFI +IALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL----- +FGKYRRGHR--------------------------------------------------- +------------------------------------------------------------ +--------------- +> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi +M------------------EGAE-GQEELDWEALYLRLPL-------------------- +------------------------QNCSWNSTGWEPNW--------NVTVVPNTTWWQAS +-----APFDTPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLA +VADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRY +WAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVS +QDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARGGVGP +-------PP-----------------------------------------------VPAG +GALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSCANGLEAD +PPTTGYGAVAAAYYPSLVRRK------PKEAADSKRERKAAKTLAIITGAFVACWLPFFV +LAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLL----- +CGRRVRRRRA-----------------------------PQ------------------- +------------------------------------------------------------ +---------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mafft_custom_original.clustal.aln Wed Mar 20 07:34:52 2024 +0000 @@ -0,0 +1,496 @@ +CLUSTAL format alignment by MAFFT G-INS-i (v7.520) + + +1== MN---------------------------------------------------------- +2== MN---------------------------------------------------------- +3== MN---------------------------------------------------------- +4=p MN---------------------------------------------------------- +5=p MN---------------------------------------------------------- +6== MK---------------------------------------------------------- +7== MR---------------------------------------------------------- +8=opsin, ------------------------------------------------------------ +9== MA------QQWS-LQRLAGRHPQDS--------------------------YED------ +10== MA------QQWS-LQRLAGRHPQDS--------------------------YED------ +11== MT------EAWNVAVFAARRSRDD----------------------------DD------ +12== MA-------AWE-AAFAARRRHEE----------------------------ED------ +13== MS---------------------------------------------------------- +14== MESGNVS----------------------------------------------------- +15== MEYHNVS----------------------------------------------------- +16== ME---------------------------------------------------------- +17== MD---------------------------------------------------------- +18== MT---------------------------------------------------------- +19== MA---------------------------------------------------------- +20== MESF-------------------------------------------------------- +21== MESF-------------------------------------------------------- +22== MDSF-------------------------------------------------------- +23== MERSHLP---------------------------------------------------ET +24== MERSLLP---------------------------------------------------EP +25== MI---------------------------------------------------------- +26== M----------------------------------------------------------- +27== MV---------------------------------------------------------- +28== MG---------------------------------------------------------- +29== MMDVN-----------SSGRPDLYGHLRSFLLPEVGRGLPDLSPDG------------GA +30== M----------------------------------------------------------- +31=p M----------------------------------------------------------- +32== MDVLS------------------------------------------------------- +33=p MDVFS------------------------------------------------------- +34== MANFTFGDLALD-VARMGGLASTPSGLRS-----TGLTTPGLSPTGLVTSDFNDSYGLTG +35=p MANFTFGDLALD-VARMGGLASTPSGLRS-----TGLTTPGLSPTGLVTSDFNDSYGLTG +36== ME---------------------------------------------------------- + + +1== ----GTEG--DNFY------------VPFSNKTG----------------------LARS +2== ----GTEG--PNFY------------VPFSNITG----------------------VVRS +3== ----GTEG--INFY------------VPMSNKTG----------------------VVRS +4=p ----GTEG--KNFY------------VPMSNRTG----------------------LVRS +5=p ----GTEG--NNFY------------VPLSNRTG----------------------LVRS +6== ----QVPEFHEDFY------IPIP--LDINNLSA------------------------YS +7== ----KMSE--EEFY------------L-FKNISS----------------------V--G +8=opsin, ------------------------------------------------------------ +9== ----STQS--SIFT------------YTNSNSTR-------------------------G +10== ----STQS--SIFT------------YTNSNSTR-------------------------G +11== ----TTRG--SVFT------------YTNTNNTR-------------------------G +12== ----TTRD--SVFT------------YTNSNNTR-------------------------G +13== ----SNSS------------------QAPPNGTP-------------------------G +14== SSLFGNVS--TALR-------------PEARLSA---E------TRLLGWNVPPEELRHI +15== SVL-GNVS--SVLR-------------PDARLSA---E------SRLLGWNVPPDELRHI +16== -PLCNASE--PPLR-------------PEAR-SSGNGD------LQFLGWNVPPDQIQYI +17== -ALCNASE--PPLR-------------PEARMSSGSDE------LQFLGWNVPPDQIQYI +18== ----NATG--PQMAY-----------YGAASMDFGYPE------GVSIVDFVRPEIKPYV +19== ----NVTG--PQMAF-----------YGSGAATFGYPE------GMTVADFVPDRVKHMV +20== AVAAAQLG--PHFA----------------PLS-----------NGSVVDKVTPDMAHLI +21== AVAAAQLG--PHFA----------------PLS-----------NGSVVDKVTPDMAHLI +22== AAVATQLG--PQFA----------------APS-----------NGSVVDKVTPDMAHLI +23== PFDLAHSG--PRFQ----------------AQSSG---------NGSVLDNVLPDMAHLV +24== PLAMALLG--PRFE----------------AQTGG---------NRSVLDNVLPDMAPLV +25== ----AVSG--PSYE----------------AFSYGGQARFN---NQTVVDKVPPDMLHLI +26== -----ANQ--LSYS----------------SLGWPYQP------NASVVDTMPKEMLYMI +27== ----ESTT--------------------LVNQTWWY--------NPTVD----------I +28== ----RDLR---------------------DNETWWY--------NPSIV----------V +29== DPVAGSWA--PHLL------------S---EVTASPAPTWDAPPDNASG----------- +30== ----------PHLL------------SGFLEVTASPAPTWDAPPDNVSG----------- +31=p ----------PHLL------------SGFLEVTASPAPTWDAPPDNVSG----------- +32== --------------------------PGQGNNTTSPPAPFETGGNTTGI----------- +33=p --------------------------FGQGNNTTASQEPFGTGGNVTSI----------- +34== QFINGSHS--SRSRD-----------NASANDT-----------SATNM----------T +35=p QFINGSHS--SRSRD-----------NASANDT-----------SATNM----------T +36== ----GAEG--QEELDWEALYLRLPLQNCSWNSTGWEPN-----WNVTVV----------P + + +1== PYEYPQY------YLAEPWKYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYIL +2== PFEQPQY------YLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYIL +3== PFEYPQY------YLAEPWKYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYIL +4=p PFEYPQY------YLAEPWQFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFIL +5=p PFEYPQY------YLAEPWQFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFIL +6== PFLVPQD------HLGNQGIFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYIL +7== PWDGPQY------HIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYIL +8=opsin, ------------------------------------------------------------ +9== PFEGPNY------HIAPRWVYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWIL +10== PFEGPNY------HIAPRWVYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWIL +11== PFEGPNY------HIAPRWVYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWIL +12== PFEGPNY------HIAPRWVYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWIL +13== PFDGPQWP-----YQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYIL +14== PEHWLTY------PEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILV +15== PEHWLIY------PEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILV +16== PEHWLTQ------LEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFV +17== PEHWLTQ------LEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFV +18== HQHWYNY------PPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILV +19== LDHWYNY------PPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLI +20== SPYWNQF------PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLV +21== SPYWNQF------PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLV +22== SPYWDQF------PAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLV +23== NPYWSRF------APMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLV +24== NPHWSRF------APMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLV +25== DANWYQY------PPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFV +26== HEHWYAF------PPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLV +27== HPHWAKF------DPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFI +28== HPHWREF------DQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFI +29== ---------CGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLI +30== ---------CGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLI +31=p ---------CGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLI +32== -------------SDVTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLI +33=p -------------SDVTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLI +34== DDRYWSL------TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLI +35=p DDRYWSL------TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLI +36== NTTWWQASAPFD-TPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLI + + +1== LNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLA +2== LNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLA +3== VNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLA +4=p VNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLA +5=p VNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLA +6== VNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVA +7== VNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLA +8=opsin, ------DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIIS +9== VNLAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIIS +10== VNLAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIIS +11== VNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIIS +12== VNLAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIIS +13== VNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILA +14== INLAFCDFMMM-VK-TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIA +15== INLAFCDFMMM-IK-TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIA +16== LNLAVFDLIMC-LK-APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIG +17== LNLAVFDLIMC-LK-APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIG +18== VNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMIS +19== VNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMIS +20== INLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMIS +21== INLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMIS +22== INLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMIS +23== LNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIA +24== LNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIA +25== INLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIA +26== VNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMIT +27== INLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMIS +28== INLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMIS +29== VSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVIS +30== VSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVIS +31=p VSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVIS +32== GSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIA +33=p GSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIA +34== LSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIA +35=p LSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIA +36== LSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIA + . : : * : : + +1== IERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQ +2== IERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQ +3== IERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQ +4=p IERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQ +5=p IERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQ +6== FERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLFG-WS-----RYIPEGLQ +7== FERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-WS-----RFIPEGLQ +8=opsin, WERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLK +9== WERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLK +10== WERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-WS-----RYWPHGLK +11== WERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-WS-----RYWPHGLK +12== WERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-WS-----RYWPHGLK +13== LERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-WS-----SYVPEGLR +14== YDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYL +15== YDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYL +16== YDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYL +17== YDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYL +18== FDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-WG-----NYILEGIL +19== FDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-WG-----SYTLEGIL +20== LDRYQVIVKGMAGR-PMTIPLALGKM---------------------------YVPEGNL +21== LDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNL +22== LDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNL +23== FDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-WS-----AYVPEGNL +24== FDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-WS-----SYVPEGNL +25== FDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-WN-----RYVPEGNM +26== LDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-WS-----RYVPEGNL +27== IDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-WG-----AYVPEGIL +28== IDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-WG-----AYTLEGVL +29== IDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKV +30== IDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKV +31=p IDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKV +32== LDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDA +33=p LDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDA +34== MDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGT +35=p MDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGT +36== LDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLR + :*: : : . + +1== CSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA--- +2== CSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA--- +3== CSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA--- +4=p CSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA--- +5=p CSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA--- +6== CSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA--- +7== CSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA--- +8=opsin, TSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE--- +9== TSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE--- +10== TSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE--- +11== TSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE--- +12== TSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE--- +13== TSCGPNWYTGGS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD--- +14== TSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKM +15== TSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKM +16== TSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKM +17== TSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKM +18== DSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKM +19== DSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKM +20== TSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKM +21== TSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKM +22== TSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKM +23== TACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKM +24== TACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKM +25== TACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKM +26== TSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKM +27== TSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRL +28== CNCSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRL +29== CLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG--- +30== CLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG--- +31=p CLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG--- +32== CTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKT +33=p CTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKK +34== CIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKA +35=p CIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKA +36== CVVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARG + : * : * : + +1== ------------------------------------------------------------ +2== ------------------------------------------------------------ +3== ------------------------------------------------------------ +4=p ------------------------------------------------------------ +5=p ------------------------------------------------------------ +6== ------------------------------------------------------------ +7== ------------------------------------------------------------ +8=opsin, ------------------------------------------------------------ +9== ------------------------------------------------------------ +10== ------------------------------------------------------------ +11== ------------------------------------------------------------ +12== ------------------------------------------------------------ +13== ------------------------------------------------------------ +14== NVE------------------SLRS----------------------------------- +15== NVD------------------SLRS----------------------------------- +16== NVE------------------SLRS----------------------------------- +17== NVE------------------SLRS----------------------------------- +18== NVS------------------TLRS----------------------------------- +19== NVT------------------NLRS----------------------------------- +20== NVK------------------SLRS----------------------------------- +21== NVK------------------SLRS----------------------------------- +22== NVK------------------SLRS----------------------------------- +23== NVK------------------SLRS----------------------------------- +24== NVK------------------SLRS----------------------------------- +25== NVA------------------SLRS----------------------------------- +26== NVA------------------SLRA----------------------------------- +27== NAK------------------ELRK----------------------------------- +28== NAK------------------ELRK----------------------------------- +29== -----------FPRVEPDSVIALNG----------------------------------- +30== -----------FPRVQPESVISLNG----------------------------------- +31=p -----------FPRVQPESVISLNG----------------------------------- +32== GADTRHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALC------------- +33=p GAGTSLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPC------------- +34== RLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKK +35=p RLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKK +36== GVG---------------------------------------------PPP--------- + + +1== ------------------------------------------------------------ +2== ------------------------------------------------------------ +3== ------------------------------------------------------------ +4=p ------------------------------------------------------------ +5=p ------------------------------------------------------------ +6== ------------------------------------------------------------ +7== ------------------------------------------------------------ +8=opsin, ------------------------------------------------------------ +9== ------------------------------------------------------------ +10== ------------------------------------------------------------ +11== ------------------------------------------------------------ +12== ------------------------------------------------------------ +13== ------------------------------------------------------------ +14== ------------------------------------------------------------ +15== ------------------------------------------------------------ +16== ------------------------------------------------------------ +17== ------------------------------------------------------------ +18== ------------------------------------------------------------ +19== ------------------------------------------------------------ +20== ------------------------------------------------------------ +21== ------------------------------------------------------------ +22== ------------------------------------------------------------ +23== ------------------------------------------------------------ +24== ------------------------------------------------------------ +25== ------------------------------------------------------------ +26== ------------------------------------------------------------ +27== ------------------------------------------------------------ +28== ------------------------------------------------------------ +29== ---------------------------IVKLQKE-------------------VEECAN- +30== ---------------------------VVKLQKE-------------------VEECAN- +31=p ---------------------------VVKLQKE-------------------VEECAN- +32== -----------------------------------ANGAVRQGDDGAALEVIEVHRVGNS +33=p -----------------------------------TNGAVRQGDDEATLEVIEVHRVGNS +34== LPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNG +35=p LPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNG +36== VPAGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSCANG + + +1== -----------------------------------STQKAEKEVTRMVVLMVIGFLVCWV +2== -----------------------------------TTQKAEKEVTRMVIIMVIFFLICWL +3== -----------------------------------TTQKAEKEVTRMVILMVLGFMLAWT +4=p -----------------------------------STQKAEREVTKMVILMVFGFLIAWT +5=p -----------------------------------STQKAEREVTKMVILMVLGFLVAWT +6== -----------------------------------STQKAEREVTKMVVVMVLGFLVCWA +7== -----------------------------------TTQKAEREVSRMVVVMVGSFCVCYV +8=opsin, -----------------------------------STQKAEKEVTRMVVVMVLAFC---- +9== -----------------------------------STQKAEKEVTRMVVVMVLAFCFCWG +10== -----------------------------------STQKAEKEVTRMVVVMIFAYCVCWG +11== -----------------------------------STQKAEREVSRMVVVMIVAFCICWG +12== -----------------------------------STQKAEKEVSRMVVVMIVAYCFCWG +13== -----------------------------------TTQRAEREVTRMVIVMVMAFLLCWL +14== -------------------------------NVDKNKETAEIRIAKAAITICFLFFCSWT +15== -------------------------------NVDKSKEAAEIRIAKAAITICFLFFASWT +16== -------------------------------NVDKSKETAEIRIAKAAITICFLFFVSWT +17== -------------------------------NVDKSKETAEIRIAKAAITICFLFFVSWT +18== --------------------------------NEADAQRAEIRIAKTALVNVSLWFICWT +19== --------------------------------NEAETQRAEIRIAKTALVNVSLWFICWT +20== --------------------------------SEDAEKSAEGKLAKVALVTITLWFMAWT +21== --------------------------------SEDAEKSAEGKLAKVALVTITLWFMAWT +22== --------------------------------SEDADKSAEGKLAKVALVTISLWFMAWT +23== --------------------------------SEDCDKSAEGKLAKVALTTISLWFMAWT +24== --------------------------------SEDCDKSAENKLAKVALTTISLWFMAWT +25== --------------------------------SENQNTSAECKLAKVALMTISLWFMAWT +26== -------------------------------NADQQKQSAECRLAKVAMMTVGLWFMAWT +27== ---------------------------------AQAGASAEMKLAKISMVIITQFMLSWS +28== ---------------------------------AQAGANAEMRLAKISIVIVSQFLLSWS +29== -----------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWL +30== -----------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWL +31=p -----------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWL +32== KEHLPLPSEAG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWL +33=p KEHLPLPSESG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWL +34== KK-------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWL +35=p KK-------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWL +36== LEADPPTTGYGAVAAAYYPSLVRR------KPKEAADSKRERKAAKTLAIITGAFVACWL + * . : + +1== PYASVAFYIFT---HQGSD-FGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLC +2== PYASVAMYIFT---HQGSN-FGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLC +3== PYAVVAFWIFT---NKGAD-FTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTIC +4=p PYATVAAWIFF---NKGAD-FSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIF +5=p PYATVAAWIFF---NKGAA-FSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLF +6== PYASFSLWIVS---HRGEE-FDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMVC +7== PYAAFAMYMVN---NRNHG-LDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMVC +8=opsin, ------------------------------------------------------------ +9== PYAFFACFAAA---NPGYP-FHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF- +10== PYTFFACFAAA---NPGYA-FHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF- +11== PYASFVSFAAA---NPGYA-FHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF- +12== PYTFFACFAAA---NPGYA-FHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF- +13== PYSTFALVVAT---HKGII-IQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLC +14== PYGVMSLIGAF---GDKTL-LTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCP +15== PYGVMSLIGAF---GDKTL-LTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCP +16== PYGVMSLIGAF---GDKSL-LTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCP +17== PYGVMSLIGAF---GDKSL-LTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCP +18== PYALISLKGVM---GDTSG-ITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLP +19== PYAAITIQGLL---GNAEG-ITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLP +20== PYLVINCMGLF---KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP +21== PYLVINCMGLF---KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP +22== PYLVINCMGLF---KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP +23== PYLVICYFGLF---KI-DG-LTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCP +24== PYLIICYFGLF---KI-DG-LTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCP +25== PYLVINFSGIF---NL-VK-ISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFP +26== PYLIISWAGVF---SSGTR-LTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFP +27== PYAIIALLAQF---GPAEW-VTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFP +28== PYAVVALLAQF---GPLEW-VTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFP +29== PFFLLSTARPFICGTSCSC-IPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ +30== PFFLLSTARPFICGTSCSC-IPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQ +31=p PFFLLSTARPFICGTSCSC-IPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ +32== PFFIVALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK +33=p PFFIVALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK +34== PFFIIALIGPF---VDPEG-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF +35=p PFFIIALIGPF---VDPEG-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF +36== PFFVLAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLC + + +1== C-GKNPLGDDE-SGA-STSKTEVSSVS-TSPV---------------------------- +2== C-GKNPLGDDE-ASA-TASKTE------TSQV---------------------------- +3== C-GKNPFGDEDVSSTVSQSKTEVSSVS-SSQV---------------------------- +4=p C-GKNPLGDDE-SSTVSTSKTEVSS------V---------------------------- +5=p C-GKNPLGDEE-SSTVSTSKTEVSS------V---------------------------- +6== --GKN-IEEDE-AST-SSQVTQVSSVA-PEK----------------------------- +7== --GKA-MTDES-DTC-SSQKTEVSTVS-STQV---------------------------- +8=opsin, ------------------------------------------------------------ +9== --GKK-VDDGS-ELS-SASKTEVSSV---SSV---------------------------- +10== --GKK-VDDGS-ELS-SASKTEVSSV---SSV---------------------------- +11== --GKK-VDDGS-EAS-TTSRTEVSSVS-NSSV---------------------------- +12== --GKK-VDDGS-EVS--TSRTEVSSVS-NSSV---------------------------- +13== C-GYQPQRTGKASPGTPGPHADVTAAGLRNKV---------------------------- +14== WLALNEKAPES-SAV-ASTST---TQE-PQQT---------------------------- +15== WLAISEKAPES-RAA-ISTST---TQE-QQQT---------------------------- +16== WLGVNEKSGEI-SSA-QSTTT---QEQ--QQT---------------------------- +17== WLGVNEKSGEA-SSA-QSTTT---QEQ-TQQT---------------------------- +18== WFCVHETETKS-NDD-SQSNS---TVA-Q------------------------------- +19== WFCVHEKDPND-VEE-NQSSN---TQT-Q------------------------------- +20== CCVFGKVDDGK-SSD-AQSQA-TASEA-E------------------------------- +21== CCVFGKVDDGK-SSD-AQSQA-TASEA-E------------------------------- +22== CCVFGKVDDGK-SSE-AQSQA-TTSEA-E------------------------------- +23== MCVFGNTDEPK-PDA-PASDTETTSEA-D------------------------------- +24== MCVCGTTDEPK-PDA-PPSDTETTSEA-E------------------------------- +25== SLACA-AEPSS-DAV-STTSG-TTTVT-DNEK---------------------------- +26== SLACGSGESGS-DVK-SEASA-TTTME-EKPK---------------------------- +27== WLLTCCQFDEK-ECE-DANDAEEEVVA-SER---GGESRDAAQMKEMMAMMQKMQAQQAA +28== WVLTCCQFDDK-ETE-DDKDAETEIPA-GESSD-AAPSADAAQMKEMMAMMQKMQQQQAA +29== --CQYRNINRKLSAA-GMHEALKLAER-PERPEFVLQNADYCRKKG-------------- +30== --CQYRNINRKLSAA-GMHEALKLAER-PERSEFVLQNSDHCGKKG-------------- +31=p --CQYRNINRKLSAA-GMHEALKLAER-PERSEFVLQNSDHCGKKG-------------- +32== --CKFCR----------------------------------------------------- +33=p --CKFCR----------------------------------------------------- +34== --GKYRR----------------------------------------------------- +35=p --GKYRR----------------------------------------------------- +36== --GRRVR-RRR------------------------------------------------- + + +1== ------------------------------------------------------------ +2== ------------------------------------------------------------ +3== ------------------------------------------------------------ +4=p ------------------------------------------------------------ +5=p ------------------------------------------------------------ +6== ------------------------------------------------------------ +7== ------------------------------------------------------------ +8=opsin, ------------------------------------------------------------ +9== ------------------------------------------------------------ +10== ------------------------------------------------------------ +11== ------------------------------------------------------------ +12== ------------------------------------------------------------ +13== ---------------------------------------MP------------------- +14== ------------------------------------------------------------ +15== ------------------------------------------------------------ +16== ------------------------------------------------------------ +17== ------------------------------------------------------------ +18== ------------------------------------------------------------ +19== ------------------------------------------------------------ +20== ------------------------------------------------------------ +21== ------------------------------------------------------------ +22== ------------------------------------------------------------ +23== ------------------------------------------------------------ +24== ------------------------------------------------------------ +25== ------------------------------------------------------------ +26== ------------------------------------------------------------ +27== YQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGA +28== YPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP------AA +29== ------------------------------------------------------------ +30== ------------------------------------------------------------ +31=p ------------------------------------------------------------ +32== ------------------------------------------------------------ +33=p ------------------------------------------------------------ +34== ------------------------------------------------------------ +35=p ------------------------------------------------------------ +36== ------------------------------------------------------------ + + +1== ---------SPA +2== ---------APA +3== ---------SPA +4=p ---------SPA +5=p ---------SPA +6== ------------ +7== ---------GPN +8=opsin, ------------ +9== ---------SPA +10== ---------SPA +11== ---------APA +12== ---------SPA +13== --------AHPV +14== ---------TAA +15== ---------TAA +16== ---------TAA +17== ---------SAA +18== ---------DKA +19== ---------EKS +20== ---------SKA +21== ---------SKA +22== ---------SKA +23== ---------SKA +24== ---------SKD +25== ---------SNA +26== --------IPEA +27== PPQGVDNQAYQA +28== PPQGVDNQAYQA +29== ---------HDS +30== ---------HDT +31=p ---------HDT +32== -----------Q +33=p -----------R +34== ---------GHR +35=p ---------GHR +36== ---------APQ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mafft_custom_parttree.aln Wed Mar 20 07:34:52 2024 +0000 @@ -0,0 +1,504 @@ +> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] +MN-------------------GTE------GDNFYVPF----------------SNKTGL +-----------ARSPYEYPQY-----YLAEPWK--------------------Y------ +------------------SALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNL +AMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIER +YIVICKPMGN-FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSC +GPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE----------- +------------------------------------------------------------ +------------------------------------------------------------ +---AAAAQQ------------------------------------ESASTQKAEKEVTRM +VVLMVIGFLVCWVPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMN +KQFRNCMITTLC----C---GKNPLGD-DE--SGASTSKTEV------------------ +------------------------------------------------------------ +---SSVS-------TSPVSP-A---------- +> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94] +MN-------------------GTE------GPNFYVPF----------------SNITGV +-----------VRSPFEQPQY-----YLAEPWQ--------------------F------ +------------------SMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNL +AVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIER +YVVVCKPMSN-FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSC +GIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE----------- +------------------------------------------------------------ +------------------------------------------------------------ +---AAAQQQ------------------------------------ESATTQKAEKEVTRM +VIIMVIFFLICWLPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMN +KQFRNCMLTSLC----C---GKNPLGD-DE--ASATASKTE------------------- +------------------------------------------------------------ +--------------TSQVAP-A---------- +> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9 +MN-------------------GTE------GINFYVPM----------------SNKTGV +-----------VRSPFEYPQY-----YLAEPWK--------------------Y------ +------------------RLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNL +AVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIER +YIVVCKPMGN-FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSC +GPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE----------- +------------------------------------------------------------ +------------------------------------------------------------ +---AAAQQQ------------------------------------ESATTQKAEKEVTRM +VILMVLGFMLAWTPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMN +KQFRNCMITTIC----C---GKNPFGD-EDVSSTVSQSKTEV------------------ +------------------------------------------------------------ +---SSVS-------SSQVSP-A---------- +> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish +MN-------------------GTE------GKNFYVPM----------------SNRTGL +-----------VRSPFEYPQY-----YLAEPWQ--------------------F------ +------------------KILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNL +AVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIER +YIVVCKPMGS-FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSC +GPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---AAAQQQ------------------------------------DSASTQKAEREVTKM +VILMVFGFLIAWTPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLN +KQFRNCMLTTIF----C---GKNPLGD-DE-SSTVSTSKTEV------------------ +------------------------------------------------------------ +---SS------------VSP-A---------- +> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish +MN-------------------GTE------GNNFYVPL----------------SNRTGL +-----------VRSPFEYPQY-----YLAEPWQ--------------------F------ +------------------KLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNL +AVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIER +YIVVCKPMGS-FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSC +GPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---AAAQQQ------------------------------------DSASTQKAEREVTKM +VILMVLGFLVAWTPYATVAAWIFF-NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLN +KQFRSCMLTTLF----C---GKNPLGD-EE-SSTVSTSKTEV------------------ +------------------------------------------------------------ +---SS------------VSP-A---------- +> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208- +MK-------------------QVPE----FHEDFYIPIPL------------DINNLSAY +-------------SPFLVPQD-----HLGNQGI--------------------F------ +------------------MAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNL +SIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFER +WLVICKPLGN-FTFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSC +GPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL----------- +------------------------------------------------------------ +------------------------------------------------------------ +---AAKAQA------------------------------------DSASTQKAEREVTKM +VVVMVLGFLVCWAPYASFSLWIVS-HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMN +KQFRSCMM-KMV----C---GKN-IEE-DE--ASTSSQVTQV------------------ +------------------------------------------------------------ +---SS------------VAPEK---------- +> 7== M13299 1 human BCP <>[Science232(4747),193-202'86] +MR-------------------KMS------EEEFYL-----------------FKNISSV +-------------GPWDGPQY-----HIAPVWA--------------------F------ +------------------YLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNV +SFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFER +YIVICKPFGN-FRFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSC +GPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---VAAQQQ------------------------------------ESATTQKAEREVSRM +VVVMVGSFCVCYVPYAAFAMYMVN-NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMN +KQFQACIM-KMV----C---GKA-MTD-ES--DTCSSQKTEV------------------ +------------------------------------------------------------ +---STVS-------STQVGP-N---------- +> 8=opsin, greensensitive human (fragment) S07060 +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER +WLVVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSC +GPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---VAKQQK------------------------------------ESESTQKAEKEVTRM +VVVMVLAFC--------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------- +> 9== K03494 1 human GCP <>[Science232(4747),193-202'86] +MAQQWSLQRLAGRHPQDSYEDSTQ------SSIFTYTN----------------SNST-- +------------RGPFEGPNY-----HIAPRWV--------------------Y------ +------------------HLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNL +AVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER +WMVVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSC +GPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---VAKQQK------------------------------------ESESTQKAEKEVTRM +VVVMVLAFCFCWGPYAFFACFAAA-NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMN +RQFRNCILQLF---------GKK-VDD-GS--ELSSASKTEV------------------ +------------------------------------------------------------ +---SSV---------SSVSP-A---------- +> 10== Z68193 1 human Red Opsin <>[] +MAQQWSLQRLAGRHPQDSYEDSTQ------SSIFTYTN----------------SNST-- +------------RGPFEGPNY-----HIAPRWV--------------------Y------ +------------------HLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNL +AVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWER +WLVVCKPFGN-VRFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSC +GPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---VAKQQK------------------------------------ESESTQKAEKEVTRM +VVVMIFAYCVCWGPYTFFACFAAA-NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMN +RQFRNCILQLF---------GKK-VDD-GS--ELSSASKTEV------------------ +------------------------------------------------------------ +---SSV---------SSVSP-A---------- +> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92] +MTEAWNVAVFAARRSRDD-DDTTR------GSVFTYTN----------------TNNT-- +------------RGPFEGPNY-----HIAPRWV--------------------Y------ +------------------NLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNL +AFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWER +WFVVCKPFGN-IKFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSC +GPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---VAAQQK------------------------------------ESESTQKAEREVSRM +VVVMIVAFCICWGPYASFVSFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMN +RQFRNCIMQLF---------GKK-VDD-GS--EASTTSRTEV------------------ +------------------------------------------------------------ +---SSVS-------NSSVAP-A---------- +> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] +MA-AWEAAFAARRRHEE--EDTTR------DSVFTYTN----------------SNNT-- +------------RGPFEGPNY-----HIAPRWV--------------------Y------ +------------------NLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNL +AVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWER +WFVVCKPFGN-IKFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSC +GPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---VAAQQK------------------------------------ESESTQKAEKEVSRM +VVVMIVAYCFCWGPYTFFACFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMN +RQFRNCILQLF---------GKK-VDD-GS--EVST-SRTEV------------------ +------------------------------------------------------------ +---SSVS-------NSSVSP-A---------- +> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] +MS-----------------SNSSQ------AP----------------------PNGT-- +------------PGPFDGPQW----PYQAPQST--------------------Y------ +------------------VGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNL +AVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALER +YVVVCKPLGD-FQFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSC +GPNWYTGGS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---AAAQQK------------------------------------EADTTQRAEREVTRM +VIVMVMAFLLCWLPYSTFALVVAT-HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMN +KQFQSCLLEMLC----CGYQPQR-TGK-AS--PGTPGPHADV------------------ +------------------------------------------------------------ +---TAAG------LRNKVMP-AH-------PV +> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] +MESGNV---------------SSSLFGNVST-ALRPEARLSA---ETRLLGW-------- +------------NVPPEELR------HIPEHWL-----------TYPEPPESMN------ +------------------YLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINL +AFCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDR +FNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSC +TFDYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---LRDQAKK----------MNVESL----------------RSNVDKNKETAEIRIAKA +AITICFLFFCSWTPYGVMSLIGAF-GDKT--LLTPGATMIPACACKMVACIDPFVYAISH +PRYRMELQKRCP----WLALNEKAPE--SS-AVASTSTTQEP------------------ +------------------------------------------------------------ +---------------QQTTA-A---------- +> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 +MEYHNV---------------SSVL-GNVSS-VLRPDARLSA---ESRLLGW-------- +------------NVPPDELR------HIPEHWL-----------IYPEPPESMN------ +------------------YLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINL +AFCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDR +YNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSC +TFDYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---LRDQAKK----------MNVDSL----------------RSNVDKSKEAAEIRIAKA +AITICFLFFASWTPYGVMSLIGAF-GDKT--LLTPGATMIPACTCKMVACIDPFVYAISH +PRYRMELQKRCP----WLAISEKAPE--SR-AAISTSTTQEQ------------------ +------------------------------------------------------------ +---------------QQTTA-A---------- +> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] +ME---------------------PL-CNASEPPLRPEAR-SSGNGDLQFLGW-------- +------------NVPPDQIQ------YIPEHWL-----------TQLEPPASMH------ +------------------YMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNL +AVFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDR +YNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSC +SFDYLS--DNFDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---LREQAKK----------MNVESL----------------RSNVDKSKETAEIRIAKA +AITICFLFFVSWTPYGVMSLIGAF-GDKS--LLTQGATMIPACTCKLVACIDPFVYAISH +PRYRLELQKRCP----WLGVNEKSGE--IS-SAQST-TTQEQ------------------ +------------------------------------------------------------ +---------------QQTTA-A---------- +> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 +MD---------------------AL-CNASEPPLRPEARMSSGSDELQFLGW-------- +------------NVPPDQIQ------YIPEHWL-----------TQLEPPASMH------ +------------------YMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNL +AVFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDR +YNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSC +SFDYLS--DNFDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---LREQAKK----------MNVESL----------------RSNVDKSKETAEIRIAKA +AITICFLFFVSWTPYGVMSLIGAF-GDKS--LLTPGATMIPACTCKLVACIEPFVYAISH +PRYRMELQKRCP----WLGVNEKSGE--AS-SAQST-TTQEQ------------------ +------------------------------------------------------------ +--------------TQQTSA-A---------- +> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1 +MT-------------------------NATGPQMAYYGAASMD------FGYP-EGVSIV +-----------DFVRPEIKP------YVHQHWY-----------NYPPVNPMWH------ +------------------YLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNL +ALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDR +YNIICNGFNG-PKLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSC +SYDYLT--QDFNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MRAQAKK----------MNVSTL----------------RS-NEADAQRAEIRIAKT +ALVNVSLWFICWTPYALISLKGVM-GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISH +PKYRLAITQHLP----WFCVHETETKS-ND-DSQSNSTVAQ------------------- +------------------------------------------------------------ +------------------DK-A---------- +> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1 +MA-------------------------NVTGPQMAFYGSGAAT------FGYP-EGMTVA +-----------DFVPDRVKH------MVLDHWY-----------NYPPVNPMWH------ +------------------YLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNL +ALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDR +YNIICNGFNG-PKLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSC +SYDYFT--RDMNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MRAQAKK----------MNVTNL----------------RS-NEAETQRAEIRIAKT +ALVNVSLWFICWTPYAAITIQGLL-GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISH +PKFRLAITQHLP----WFCVHEKDPND-VE-ENQSSNTQTQ------------------- +------------------------------------------------------------ +------------------EK-S---------- +> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] +ME-------------------SFAVAAAQLGPHFAPLS-----------------NGSVV +-----------DKVTPDMAH------LISPYWN-----------QFPAMDPIWA------ +------------------KILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINL +AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR +YQVIVKGMAG-RPMTIPLALGKM---------------------------YVPEGNLTSC +GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MREQAKK----------MNVKSL----------------RS-SEDAEKSAEGKLAKV +ALVTITLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH +PKYRLALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------ +------------------------------------------------------------ +---E--------------SK-A---------- +> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] +ME-------------------SFAVAAAQLGPHFAPLS-----------------NGSVV +-----------DKVTPDMAH------LISPYWN-----------QFPAMDPIWA------ +------------------KILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINL +AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR +YQVIVKGMAG-RPMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSC +GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MREQAKK----------MNVKSL----------------RS-SEDAEKSAEGKLAKV +ALVTITLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH +PKYRLALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA------------------ +------------------------------------------------------------ +---E--------------SK-A---------- +> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' +MD-------------------SFAAVATQLGPQFAAPS-----------------NGSVV +-----------DKVTPDMAH------LISPYWD-----------QFPAMDPIWA------ +------------------KILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINL +AISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDR +YQVIVKGMAG-RPMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSC +GIDYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MREQAKK----------MNVKSL----------------RS-SEDADKSAEGKLAKV +ALVTISLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH +PKYRLALKEKCP----CCVFGKVDDGK-SS-EAQSQATTSEA------------------ +------------------------------------------------------------ +---E--------------SK-A---------- +> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] +MERSH--------------LPETPFDLAHSGPRFQAQSSG---------------NGSVL +-----------DNVLPDMAH------LVNPYWS-----------RFAPMDPMMS------ +------------------KILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNL +AFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDR +YNVIVKGING-TPMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTAC +SIDYMT--RMWNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MREQAKK----------MNVKSL----------------RS-SEDCDKSAEGKLAKV +ALTTISLWFMAWTPYLVICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH +PKYRIVLKEKCP----MCVFGNTDEPKPDA-PASDTETTSEA------------------ +------------------------------------------------------------ +---D--------------SK-A---------- +> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 +MERSL--------------LPEPPLAMALLGPRFEAQTGG---------------NRSVL +-----------DNVLPDMAP------LVNPHWS-----------RFAPMDPTMS------ +------------------KILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNL +AFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDR +YNVIVKGING-TPMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTAC +SIDYMT--RQWNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MRDQAKK----------MNVKSL----------------RS-SEDCDKSAENKLAKV +ALTTISLWFMAWTPYLIICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH +PNDRLVLKEKCP----MCVCGTTDEPKPDA-PPSDTETTSEA------------------ +------------------------------------------------------------ +---E--------------SK-D---------- +> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] +MI-------------------------AVSGPSYEAFSYGGQA-----RF----NNQTVV +-----------DKVPPDMLH------LIDANWY-----------QYPPLNPMWH------ +------------------GILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINL +AISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDR +YNVIVKGLSG-KPLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTAC +GTDYFN--RGLLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MREQAKK----------MNVASL----------------RS-SENQNTSAECKLAKV +ALMTISLWFMAWTPYLVINFSGIF-NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISH +PKYRAALFAKFP----SLAC-AAEPSS-DA-VSTTSGTTTVT------------------ +------------------------------------------------------------ +---DNEK-----------SN-A---------- +> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] +M---------------------------ANQLSYSSLGWPYQP------------NASVV +-----------DTMPKEMLY------MIHEHWY-----------AFPPMNPLWY------ +------------------SILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNL +AFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDR +YNVIVRGMAA-APLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSC +TVDYLT--KDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ----------- +------------------------------------------------------------ +------------------------------------------------------------ +---LREQAKK----------MNVASL----------------RANADQQKQSAECRLAKV +AMMTVGLWFMAWTPYLIISWAGVF-SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISH +PRYKAALYQRFP----SLACGSGESGS-DV-KSEASATTTME------------------ +------------------------------------------------------------ +---EKPK----------IPE-A---------- +> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] +MVESTTL------------VNQT--------------------------WWY---NPTVD +---------------------------IHPHWA-----------KFDPIPDAVY------ +------------------YSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINL +AMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDR +YNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSC +SFDYLS--TDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MAAMAKR----------LNAKEL----------------R--KAQAGASAEMKLAKI +SMVIITQFMLSWSPYAIIALLAQF-GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSH +PKFREAIQTTFPWLLTCCQFDEKECED-AN-DAEEEVVASER--GGESRDAAQMKEMMAM +MQKMQAQQAAYQPPPPPQGY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAP +PQVEAPQGAPPQGVDNQAYQ-A---------- +> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93] +MGRDLR-------------DNET--------------------------WWY---NPSIV +---------------------------VHPHWR-----------EFDQVPDAVY------ +------------------YSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINL +AFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDR +YNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNC +SFDYIS--RDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE----------- +------------------------------------------------------------ +------------------------------------------------------------ +---MAAMAKR----------LNAKEL----------------R--KAQAGANAEMRLAKI +SIVIVSQFLLSWSPYAVVALLAQF-GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSH +PKFREAISQTFPWVLTCCQFDDKETED-DK-DAETEIPAGESSDAAPSADAAQMKEMMAM +MQKMQQQQAAY----PPQGYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAP +PQ-GAPPAAPPQGVDNQAYQ-A---------- +> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra +MMDVNS-------------SGRPDLY----GHLRSFLLP--EVG----------RGLPDL +SPDGGADPVAGSWAPHLLS---EVTASPAPTWD------------APPDNASGC------ +-------GEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSL +ALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDR +YLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VC +LIS--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-------- +---------PGF---------------------------------PRVEPDSVI------ +----------------ALNGIVK----------------------LQKEVEECAN----- +---LSRLLKH--------------------------------ER-KNISIFKREQKAATT +LGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTFLWLGYANSLINPFIYAFFN +RDLRTTYRSLLQ--------CQYRNIN----RKLSAAGMHEA------------------ +-LKLA------------------------------------------------------- +---ERPE------RPEFVLQNADYCRKKGHDS +> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] +M----------------------------------------------------------- +--------------PHLLSGFLEVTASPAPTWD------------APPDNVSGC------ +-------GEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSL +ALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDR +YLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VC +LIS--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-------- +---------PGF---------------------------------PRVQPESVI------ +----------------SLNGVVK----------------------LQKEVEECAN----- +---LSRLLKH--------------------------------ER-KNISIFKREQKAATT +LGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFN +RDLRPTSRSLLQ--------CQYRNIN----RKLSAAGMHEA------------------ +-LKLA------------------------------------------------------- +---ERPE------RSEFVLQNSDHCGKKGHDT +> 31=p A47425 serotonin receptor 5HT-7 - rat +M----------------------------------------------------------- +--------------PHLLSGFLEVTASPAPTWD------------APPDNVSGC------ +-------GEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSL +ALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDR +YLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VC +LIS--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-------- +---------PGF---------------------------------PRVQPESVI------ +----------------SLNGVVK----------------------LQKEVEECAN----- +---LSRLLKH--------------------------------ER-KNISIFKREQKAATT +LGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFN +RDLRTTYRSLLQ--------CQYRNIN----RKLSAAGMHEA------------------ +-LKLA------------------------------------------------------- +---ERPE------RSEFVLQNSDHCGKKGHDT +> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] +M-DVLS-------------PGQ--------GNNTTSPPAPFETG----------GNTTGI +-------------------------SDVTVSYQ--------------------------- +------------------VITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSL +AVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDR +YWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---AC +TIS--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------- +----------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGS +RNWRLGVESKAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP- +----ASFERK-----------NERNA----------------EA-KRKMALARERKTVKT +LGIIMGTFILCWLPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFN +KDFQNAFKKIIK--------CKFCR----------------------------------- +------------------------------------------------------------ +-------------------------------Q +> 33=p A35181 serotonin receptor class 1A - rat +M-DVFS-------------FGQ--------GNNTTASQEPFGTG----------GNVTSI +-------------------------SDVTFSYQ--------------------------- +------------------VITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSL +AVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDR +YWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---AC +TIS--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------- +----------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGS +GDWRRCAENRAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP- +----ACLERK-----------NERNA----------------EA-KRKMALARERKTVKT +LGIIMGTFILCWLPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFN +KDFQNAFKKIIK--------CKFCR----------------------------------- +------------------------------------------------------------ +-------------------------------R +> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] +M-ANFT-------------FGDLALDVARMGGLASTPSGLRSTG----------LTTPGL +SPTG------------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSAT +NMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSL +AVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDR +YWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTC +IIS--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLK +TEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK-------- +--------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEE +AS-IAMLERQ-CNNGKKISSNDTPYS----------------RT-REKLELKRERKAART +LAIITGAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFS +PEFRSAFQKILF--------GKYRRG---------------------------------- +------------------------------------------------------------ +------------------------------HR +> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail +M-ANFT-------------FGDLALDVARMGGLASTPSGLRSTG----------LTTPGL +SPTG------------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSAT +NMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSL +AVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDR +YWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTC +IIS--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLK +TEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK-------- +--------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEE +AS-IAMLERQ-CNNGKKISSNDTPYS----------------RT-REKLELKRERKAART +LAIITGAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFS +PEFRSAFQKILF--------GKYRRG---------------------------------- +------------------------------------------------------------ +------------------------------HR +> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi +M-EG-A-------------EGQEELD----WEALYLRLPLQNCS----------WNSTGW +EPNW------------------NVTVVPNTTWW----------------QASAPFDTPAA +LVRA--------------AAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSL +AVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDR +YWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RC +VVS--------QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR----------- +----------------------------------RRGATARGGVGPPPVP---------- +-----------------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNV +SSNNTSPEKQSCANGLEA---DPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKT +LAIITGAFVACWLPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFS +PEFRHAFQRLLC--------GRRVRRR----R---------------------------- +------------------------------------------------------------ +-----------------------------APQ
--- a/test-data/mafft_custom_result.aln Tue Oct 31 15:48:53 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,458 +0,0 @@ -CLUSTAL format alignment by MAFFT F-INS-i (v7.455) - - -1== MN------------------------GTE--GDNFYVPFS--------NKTGL-ARSPYE -2== MN------------------------GTE--GPNFYVPFS--------NITGV-VRSPFE -3== MN------------------------GTE--GINFYVPMS--------NKTGV-VRSPFE -4=p MN------------------------GTE--GKNFYVPMS--------NRTGL-VRSPFE -5=p MN------------------------GTE--GNNFYVPLS--------NRTGL-VRSPFE -6== MK------------------------QVPEFHEDFYIPIP-------LDINNLSAYSPFL -7== MR------------------------KMS--EEEFYL------------FKNISSVGPWD -8=opsin, ------------------------------------------------------------ -9== MAQQWSLQ-RLAGRHPQDS----YEDSTQ--SSIFTYTNS--------NS----TRGPFE -10== MAQQWSLQ-RLAGRHPQDS----YEDSTQ--SSIFTYTNS--------NS----TRGPFE -11== MTEAWNVAVFAARRSRDD------DDTTR--GSVFTYTNT--------NN----TRGPFE -12== MA-AWEAA-FAARRRHEE------EDTTR--DSVFTYTNS--------NN----TRGPFE -13== MS----------------------SNSSQ--------------AP--PNG----TPGPFD -14== MESG-NV-----------------------------------------SSSLFGNVSTAL -15== MEYH-NV------------------------------------------SSVLGNVSSVL -16== MEPLCNA------------------------------------------------SEPPL -17== MDALCNA------------------------------------------------SEPPL -18== MT-------------------------------------------------------NAT -19== MA-------------------------------------------------------NVT -20== ME-------------------------------------------------SFAVAAAQL -21== ME-------------------------------------------------SFAVAAAQL -22== MD-------------------------------------------------SFAAVATQL -23== MERS--------------------------------------------HLPETPFDLAHS -24== MERS--------------------------------------------LLPEPPLAMALL -25== MI-------------------------------------------------------AVS -26== M----------------------------------------------------------- -27== MVESTT------------------------------------------------------ -28== MGRD-------------------------------------------------------- -29== MM-----DVNSSGRPDLYGH-----------LRSFLLPEVGRGLPDLSPDGGADPVAGSW -30== M----------------------------------------------------------- -31=p M----------------------------------------------------------- -32== MD------VLSPGQ----------------------------------GNNTTSPPAPFE -33=p MD------VFSFGQ----------------------------------GNNTTASQEPFG -34== MA------NFTFGDLALDVARMGGLASTPS------------------GLRSTGLTTPGL -35=p MA------NFTFGDLALDVARMGGLASTPS------------------GLRSTGLTTPGL -36== ME-------GAEGQEELD-------------WEALYLRL---------PLQNCSWNSTGW - - -1== YP---------------------------------------------------------- -2== QP---------------------------------------------------------- -3== YP---------------------------------------------------------- -4=p YP---------------------------------------------------------- -5=p YP---------------------------------------------------------- -6== VP---------------------------------------------------------- -7== GP---------------------------------------------------------- -8=opsin, ------------------------------------------------------------ -9== GP---------------------------------------------------------- -10== GP---------------------------------------------------------- -11== GP---------------------------------------------------------- -12== GP---------------------------------------------------------- -13== GP---------------------------------------------------------- -14== RPEARL----------------------SAETRLLGWNVPPEELRHIPEHWL-------- -15== RPDARL----------------------SAESRLLGWNVPPDELRHIPEHWL-------- -16== RPEAR-SSG-------------------NGDLQFLGWNVPPDQIQYIPEHWL-------- -17== RPEARMSSG-------------------SDELQFLGWNVPPDQIQYIPEHWL-------- -18== GPQMAYYGAASMDFG-------------YPEGVSIVDFVRPEIKPYVHQHWY-------- -19== GPQMAFYGSGAATFG-------------YPEGMTVADFVPDRVKHMVLDHWY-------- -20== GPHFAPLS-----------------------NGSVVDKVTPDMAHLISPYWN-------- -21== GPHFAPLS-----------------------NGSVVDKVTPDMAHLISPYWN-------- -22== GPQFAAPS-----------------------NGSVVDKVTPDMAHLISPYWD-------- -23== GPRFQAQSSG---------------------NGSVLDNVLPDMAHLVNPYWS-------- -24== GPRFEAQTGG---------------------NRSVLDNVLPDMAPLVNPHWS-------- -25== GPSYEAFSYG--GQA-------------RFNNQTVVDKVPPDMLHLIDANWY-------- -26== -ANQLSYSSL--GWP-------------YQPNASVVDTMPKEMLYMIHEHWY-------- -27== ----------LVNQT-------------WWYNPTV----------DIHPHWA-------- -28== ---------LRDNET-------------WWYNPSI----------VVHPHWR-------- -29== APHLLS-----------------------------------EVTASPAPTWDAPPDNASG -30== -PHLLSGF--------------------------------LEVTASPAPTWDAPPDNVSG -31=p -PHLLSGF--------------------------------LEVTASPAPTWDAPPDNVSG -32== TG--------------------------------------GNTTGI-------------- -33=p TG--------------------------------------GNVTSI-------------- -34== SPTGLVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTD--DRYWSL------- -35=p SPTGLVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTD--DRYWSL------- -36== EPNW-------------------------------------NVTVVPNTTWWQ------- - - -1== -----QY-YLAEPWKYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAM -2== -----QY-YLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAV -3== -----QY-YLAEPWKYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAV -4=p -----QY-YLAEPWQFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAV -5=p -----QY-YLAEPWQFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAV -6== -----QD-HLGNQGIFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSI -7== -----QY-HIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSF -8=opsin, ------------------------------------------------------------ -9== -----NY-HIAPRWVYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAV -10== -----NY-HIAPRWVYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAV -11== -----NY-HIAPRWVYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAF -12== -----NY-HIAPRWVYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAV -13== -----QWPYQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAV -14== -----TY-PEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAF -15== -----IY-PEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAF -16== -----TQ-LEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAV -17== -----TQ-LEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAV -18== -----NY-PPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLAL -19== -----NY-PPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLAL -20== -----QF-PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAI -21== -----QF-PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAI -22== -----QF-PAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAI -23== -----RF-APMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAF -24== -----RF-APMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAF -25== -----QY-PPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAI -26== -----AF-PPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAF -27== -----KF-DPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAM -28== -----EF-DQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAF -29== CGEQINY----GRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLAL -30== CGEQINY----GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLAL -31=p CGEQINY----GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLAL -32== SDVTVSY--------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAV -33=p SDVTFSY--------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAV -34== TVYSHEH--------LVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAV -35=p TVYSHEH--------LVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAV -36== --ASAPFDTPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAV - - -1== ANLFMVLFG-FTVTMYTSMNG-YFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYI -2== ADLFMVFGG-FTTTLYTSLHG-YFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYV -3== ADLFMACFG-FTVTFYTAWNG-YFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYI -4=p AGTIMVCFG-FTVTFYTAING-YFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYI -5=p AGAIMVCFG-FTVTFYTAING-YFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYI -6== ANLFVAIFG-SPLSFYSFFNR-YFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWL -7== GGFLLCIFS-VFPVFVASCNG-YFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYI -8=opsin, -DLAETVIA-STISIVNQVSG-YFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWL -9== ADLAETVIA-STISVVNQVYG-YFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWM -10== ADLAETVIA-STISIVNQVSG-YFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWL -11== VDLVETLVA-STISVFNQIFG-YFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWF -12== ADLGETVIA-STISVINQISG-YFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWF -13== ADLLVTLCG-SSVSLSNNING-FFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYV -14== CDFMMMVK--TPIFIYNSFHQ-GYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFN -15== CDFMMMIK--TPIFIYNSFHQ-GYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYN -16== FDLIMCLK--APIF--NSFHR-GFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYN -17== FDLIMCLK--APIFIYNSFHR-GFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYN -18== SDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYN -19== SDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYN -20== SDFGIMITN-TPMMGINLYFE-TWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQ -21== SDFGIMITN-TPMMGINLYFE-TWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQ -22== SDFGIMITN-TPMMGINLYFE-TWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQ -23== SDFCMMASQ-SPVMIINFYYE-TWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYN -24== SDFCMMASQ-SPVMIINFYYE-TWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYN -25== SNFLMMFCM-SPPMVINCYYE-TWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYN -26== SDFCMMAFM-MPTMTSNCFAE-TWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYN -27== SDLSFSAINGFPLKTISAFMK-KWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYN -28== SDFTFSLVNGFPLMTISCFLK-KWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYN -29== ADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYL -30== ADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYL -31=p ADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYL -32== TDLMVSVLV-LPMAALYQVLN-KWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYW -33=p TDLMVSVLV-LPMAALYQVLN-KWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYW -34== ADLMVAVLV-MPLSVVSEISK-VWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYW -35=p ADLMVAVLV-MPLSVVSEISK-VWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYW -36== ADLLVACLV-MPLGAVYEVVQ-RWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYW - . : : * : : :*: - -1== VICKPMGNFR-FGNTHAIMGVAFTWIMALACA-APPLV-GWSRYIPEGMQCSCGPDYYTL -2== VVCKPMSNFR-FGENHAIMGVAFTWVMALACA-APPLV-GWSRYIPEGMQCSCGIDYYTL -3== VVCKPMGNFR-FSATHAMMGIAFTWVMAFSCA-APPLF-GWSRYMPEGMQCSCGPDYYTH -4=p VVCKPMGSFK-FSSSHAFAGIAFTWVMALACA-APPLF-GWSRYIPEGMQCSCGPDYYTL -5=p VVCKPMGSFK-FSSTHASAGIAFTWVMAMACA-APPLV-GWSRYIPEGIQCSCGPDYYTL -6== VICKPLGNFT-FKTPHAIAGCILPWISALAAS-LPPLF-GWSRYIPEGLQCSCGPDWYTT -7== VICKPFGNFR-FSSKHALTVVLATWTIGIGVS-IPPFF-GWSRFIPEGLQCSCGPDWYTV -8=opsin, VVCKPFGNVR-FDAKLAIVGIAFSWIWAAVWT-APPIF-GWSRYWPHGLKTSCGPDVFSG -9== VVCKPFGNVR-FDAKLAIVGIAFSWIWAAVWT-APPIF-GWSRYWPHGLKTSCGPDVFSG -10== VVCKPFGNVR-FDAKLAIVGIAFSWIWSAVWT-APPIF-GWSRYWPHGLKTSCGPDVFSG -11== VVCKPFGNIK-FDSKLAIIGIVFSWVWAWGWS-APPIF-GWSRYWPHGLKTSCGPDVFSG -12== VVCKPFGNIK-FDGKLAVAGILFSWLWSCAWT-APPIF-GWSRYWPHGLKTSCGPDVFSG -13== VVCKPLGDFQ-FQRRHAVSGCAFTWGWALLWS-APPLL-GWSSYVPEGLRTSCGPNWYTG -14== VITRPMEGK--MTHGKAIAMIIFIYMYATPWV-VACYTETWGRFVPEGYLTSCTFDYLTD -15== VITRPMEGK--MTHGKAIAMIIFIYLYATPWV-VACYTESWGRFVPEGYLTSCTFDYLTD -16== VITKPMNRN--MTFTKAVIMNIIIWLYCTPWV-VLPLTQFWDRFVPEGYLTSCSFDYLSD -17== VITKPMNRN--MTFTKAVIMNIIIWLYCTPWV-VLPLTQFWDRFVPEGYLTSCSFDYLSD -18== IICNGFNGPK-LTTGKAVVFALISWVIAIGCA-LPPFF-GWGNYILEGILDSCSYDYLTQ -19== IICNGFNGPK-LTQGKATFMCGLAWVISVGWS-LPPFF-GWGSYTLEGILDSCSYDYFTR -20== VIVKGMAGRP-MTIPLALGKM----------------------YVPEGNLTSCGIDYLER -21== VIVKGMAGRP-MTIPLALGKIAYIWFMSSIWC-LAPAF-GWSRYVPEGNLTSCGIDYLER -22== VIVKGMAGRP-MTIPLALGKIAYIWFMSTIWCCLAPVF-GWSRYVPEGNLTSCGIDYLER -23== VIVKGINGTP-MTIKTSIMKILFIWMMAVFWT-VMPLI-GWSAYVPEGNLTACSIDYMTR -24== VIVKGINGTP-MTIKTSIMKIAFIWMMAVFWT-IMPLI-GWSSYVPEGNLTACSIDYMTR -25== VIVKGLSGKP-LSINGALIRIIAIWLFSLGWT-IAPMF-GWNRYVPEGNMTACGTDYFNR -26== VIVRGMAAAP-LTHKKATLLLLFVWIWSGGWT-ILPFF-GWSRYVPEGNLTSCTVDYLTK -27== VIGRPMAASKKMSHRRAFLMIIFVWMWSIVWS-VGPVF-NWGAYVPEGILTSCSFDYLST -28== VIGRPMAASKKMSHRRAFIMIIFVWLWSVLWA-IGPIF-GWGAYTLEGVLCNCSFDYISR -29== GITRPLTYPVRQNGKCMAKMILSVWLLSASIT-LPPLF-GWAQNVNDDKVC-----LISQ -30== GITRPLTYPVRQNGKCMAKMILSVWLLSASIT-LPPLF-GWAQNVNDDKVC-----LISQ -31=p GITRPLTYPVRQNGKCMAKMILSVWLLSASIT-LPPLF-GWAQNVNDDKVC-----LISQ -32== AITDPIDYVNKRTPRRAAALISLTWLIGFLIS-IPPML-GWRTPEDRSDPDAC---TISK -33=p AITDPIDYVNKRTPRRAAALISLTWLIGFLIS-IPPML-GWRTPEDRSDPDAC---TISK -34== AVTS-IDYIRRRSARRILLMIMVVWIVALFIS-IPPLF-GWRD--PNNDPDKTGTCIISQ -35=p AVTS-IDYIRRRSARRILLMIMVVWIVALFIS-IPPLF-GWRD--PNNDPDKTGTCIISQ -36== AVTN-IDYIHASTAKRVGMMIACVWTVSFFVC-IAQLL-GWKDPDWNQRVSEDLRCVVSQ - : : - -1== NPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQE--------------- -2== KPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQE--------------- -3== NPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQE--------------- -4=p NPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQD--------------- -5=p NPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQD--------------- -6== NNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQAD--------------- -7== GTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQE--------------- -8=opsin, SSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKE--------------- -9== SSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKE--------------- -10== SSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKE--------------- -11== SVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKE--------------- -12== SSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKE--------------- -13== GS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKE--------------- -14== NF--DTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVESLRSNV- -15== NF--DTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVDSLRSNV- -16== NF--DTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMNVESLRSNV- -17== NF--DTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMNVESLRSNV- -18== DF--NTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMNVSTLRSN-- -19== DM--NTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMNVTNLRSN-- -20== DW--NPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLRSS-- -21== DW--NPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLRSS-- -22== DW--NPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLRSS-- -23== MW--NPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMNVKSLRSS-- -24== QW--NPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMNVKSLRSS-- -25== GL--LSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMNVASLRSS-- -26== DW--SSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMNVASLRANA- -27== DP--STRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLNAKELRKA-- -28== DS--TTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLNAKELRKA-- -29== D-----FGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP-------------- -30== D-----FGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP-------------- -31=p D-----FGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP-------------- -32== D-----HGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADTRHGASP -33=p D-----HGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSS -34== D-----KGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLV -35=p D-----KGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLV -36== D-----VGYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGA-------TARGGVG - * : * : - -1== ------------------------------------------------------------ -2== ------------------------------------------------------------ -3== ------------------------------------------------------------ -4=p ------------------------------------------------------------ -5=p ------------------------------------------------------------ -6== ------------------------------------------------------------ -7== ------------------------------------------------------------ -8=opsin, ------------------------------------------------------------ -9== ------------------------------------------------------------ -10== ------------------------------------------------------------ -11== ------------------------------------------------------------ -12== ------------------------------------------------------------ -13== ------------------------------------------------------------ -14== ------------------------------------------------------------ -15== ------------------------------------------------------------ -16== ------------------------------------------------------------ -17== ------------------------------------------------------------ -18== ------------------------------------------------------------ -19== ------------------------------------------------------------ -20== ------------------------------------------------------------ -21== ------------------------------------------------------------ -22== ------------------------------------------------------------ -23== ------------------------------------------------------------ -24== ------------------------------------------------------------ -25== ------------------------------------------------------------ -26== ------------------------------------------------------------ -27== ------------------------------------------------------------ -28== ------------------------------------------------------------ -29== GFPRVE--------------------------------PDSVI----------ALNGIVK -30== GFPRVQ--------------------------------PESVI----------SLNGVVK -31=p GFPRVQ--------------------------------PESVI----------SLNGVVK -32== A-PQPKKS-VNGESGSRNWRLGVESKAGGAL-----C-----------------ANGAVR -33=p A-PPPKKS-LNGQPGSGDWRRCAENRAVGTP-----C-----------------TNGAVR -34== ASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGV-- -35=p ASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGV-- -36== PPPVPAGGALVAGGGSGGIAAAVVAVIGR---------PLPTISET-------TTTGFTN - - -1== ------------------------------------------------------------ -2== ------------------------------------------------------------ -3== ------------------------------------------------------------ -4=p ------------------------------------------------------------ -5=p ------------------------------------------------------------ -6== ------------------------------------------------------------ -7== ------------------------------------------------------------ -8=opsin, ------------------------------------------------------------ -9== ------------------------------------------------------------ -10== ------------------------------------------------------------ -11== ------------------------------------------------------------ -12== ------------------------------------------------------------ -13== ------------------------------------------------------------ -14== ------------------------------------------------------------ -15== ------------------------------------------------------------ -16== ------------------------------------------------------------ -17== ------------------------------------------------------------ -18== ------------------------------------------------------------ -19== ------------------------------------------------------------ -20== ------------------------------------------------------------ -21== ------------------------------------------------------------ -22== ------------------------------------------------------------ -23== ------------------------------------------------------------ -24== ------------------------------------------------------------ -25== ------------------------------------------------------------ -26== ------------------------------------------------------------ -27== ------------------------------------------------------------ -28== ------------------------------------------------------------ -29== -------------LQKEVEEC-AN------------------LSRLLKHER--------- -30== -------------LQKEVEEC-AN------------------LSRLLKHER--------- -31=p -------------LQKEVEEC-AN------------------LSRLLKHER--------- -32== QGDDGAALEV-----IEVHRV-GNSKEHLPLPSEAGP--TPCAPASFERKNERN------ -33=p QGDDEATLEV-----IEVHRV-GNSKEHLPLPSESGS--NSYAPACLERKNERN------ -34== -NSNSSSSERLKQIQIETAEAFANGC------------AEEASIAMLERQCNNGKKISSN -35=p -NSNSSSSERLKQIQIETAEAFANGC------------AEEASIAMLERQCNNGKKISSN -36== VSSNNTSP--------EKQSC-ANGLEADPPTTGYGAVAAAYYPSLVRRKP--------- - - -1== --------SASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT--HQGSDFGAT-FM -2== --------SATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT--HQGSNFGPI-FM -3== --------SATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT--NKGADFTAT-LM -4=p --------SASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF--NKGADFSAK-FM -5=p --------SASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF--NKGAAFSAQ-FM -6== --------SASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVS--HRGEEFDLR-MA -7== --------SATTQKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN--NRNHGLDLR-LV -8=opsin, --------SESTQKAEKEVTRMVVVMVLAFC----------------------------- -9== --------SESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA--NPGYPFHPL-MA -10== --------SESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA--NPGYAFHPL-MA -11== --------SESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAA--NPGYAFHPL-AA -12== --------SESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA--NPGYAFHPL-AA -13== --------ADTTQRAEREVTRMVIVMVMAFLLCWLPYSTFALVVAT--HKGIIIQPV-LA -14== --------DKNKETAEIRIAKAAITICFLFFCSWTPYGVMSLIGAF--GDKTLLTPG-AT -15== --------DKSKEAAEIRIAKAAITICFLFFASWTPYGVMSLIGAF--GDKTLLTPG-AT -16== --------DKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF--GDKSLLTQG-AT -17== --------DKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF--GDKSLLTPG-AT -18== --------EADAQRAEIRIAKTALVNVSLWFICWTPYALISLKGVM--GDTSGITPL-VS -19== --------EAETQRAEIRIAKTALVNVSLWFICWTPYAAITIQGLL--GNAEGITPL-LT -20== --------EDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EGLTPL-NT -21== --------EDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EGLTPL-NT -22== --------EDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMGLF--KF-EGLTPL-NT -23== --------EDCDKSAEGKLAKVALTTISLWFMAWTPYLVICYFGLF--KI-DGLTPL-TT -24== --------EDCDKSAENKLAKVALTTISLWFMAWTPYLIICYFGLF--KI-DGLTPL-TT -25== --------ENQNTSAECKLAKVALMTISLWFMAWTPYLVINFSGIF--NL-VKISPL-FT -26== --------DQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVF--SSGTRLTPL-AT -27== --------Q-AGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQF--GPAEWVTPY-AA -28== --------Q-AGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQF--GPLEWVTPY-AA -29== -------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVE -30== -------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVE -31=p -------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVE -32== ----AEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCHMPTL-LG -33=p ----AEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCHMPAL-LG -34== DTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF--VDPEGIPPF-AR -35=p DTPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF--VDPEGIPPF-AR -36== -------KEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT-CD--CEVSPV-LT - * . : - -1== TLPAFFAKSSALYNPVIYILMNKQFRNCMITTL-----CC-----GKNPLGDDE-SGA-S -2== TLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSL-----CC-----GKNPLGDDE-ASA-T -3== AVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTI-----CC-----GKNPFGDEDVSSTVS -4=p AIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTI-----FC-----GKNPLGDDE-SSTVS -5=p AIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTL-----FC-----GKNPLGDEE-SSTVS -6== TIPSCLSKASTVYNPVIYVLMNKQFRSCMMK-M-----VC-----GKN-IEEDE-AST-S -7== TIPSFFSKSACIYNPIIYCFMNKQFQACIMK-M-----VC-----GKA-MTDES-DTC-S -8=opsin, ------------------------------------------------------------ -9== ALPAFFAKSATIYNPVIYVFMNRQFRNCILQ-L-----F------GKK-VDDGS-ELS-S -10== ALPAYFAKSATIYNPVIYVFMNRQFRNCILQ-L-----F------GKK-VDDGS-ELS-S -11== ALPAYFAKSATIYNPVIYVFMNRQFRNCIMQ-L-----F------GKK-VDDGS-EAS-T -12== ALPAYFAKSATIYNPIIYVFMNRQFRNCILQ-L-----F------GKK-VDDGS-EVS-T -13== SLPSYFSKTATVYNPIIYVFMNKQFQSCLLEML-----CC-----GYQPQRTGKASPGTP -14== MIPACACKMVACIDPFVYAISHPRYRMELQKRCPW---LALN---EK----APE-SSA-V -15== MIPACTCKMVACIDPFVYAISHPRYRMELQKRCPW---LAIS---EK----APE-SRA-A -16== MIPACTCKLVACIDPFVYAISHPRYRLELQKRCPW---LGVN---EK----SGE-ISS-A -17== MIPACTCKLVACIEPFVYAISHPRYRMELQKRCPW---LGVN---EK----SGE-ASS-A -18== TLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPW---FCVH---ET----ETK-SND-D -19== TLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPW---FCVH---EK----DPN-DVE-E -20== IWGACFAKSAACYNPIVYGISHPKYRLALKEKCP----CCVF---GKV--DDGK-SSD-A -21== IWGACFAKSAACYNPIVYGISHPKYRLALKEKCP----CCVF---GKV--DDGK-SSD-A -22== IWGACFAKSAACYNPIVYGISHPKYRLALKEKCP----CCVF---GKV--DDGK-SSE-A -23== IWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCP----MCVF---GNT--DEPK-PDA-P -24== IWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCP----MCVC---GTT--DEPK-PDA-P -25== IWGSLFAKANAVYNPIVYGISHPKYRAALFAKFP----SLAC---AA----EPS-SDA-V -26== IWGSVFAKANSCYNPIVYGISHPRYKAALYQRFP----SLAC---GS---GESG-SDV-K -27== ELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQF---DEK--ECED-AND-A -28== QLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQF---DDK--ETED-DKD-A -29== RTFLWLGYANSLINPFIYAFFNRDLRTTYRSLL-----QCQYRNINRKL------SAAGM -30== RTCLWLGYANSLINPFIYAFFNRDLRPTSRSLL-----QCQYRNINRKL------SAAGM -31=p RTCLWLGYANSLINPFIYAFFNRDLRTTYRSLL-----QCQYRNINRKL------SAAGM -32== AIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII-----KCKF---CRQ------------ -33=p AIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII-----KCKF---CRR------------ -34== SFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL-----FGKYR-RGHR------------ -35=p SFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL-----FGKYR-RGHR------------ -36== SLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLL------CGRRVRRRR------------ - - -1== TSKTEVSSVSTS------------------------------------------------ -2== ASKTETSQVA-------------------------------------------------- -3== QSKTEVSSVSSS------------------------------------------------ -4=p TSKTEVSSVS-------------------------------------------------- -5=p TSKTEVSSVS-------------------------------------------------- -6== SQVTQVSSVA-------------------------------------------------- -7== SQKTEVSTVSST------------------------------------------------ -8=opsin, ------------------------------------------------------------ -9== ASKTEVSSVS-------------------------------------------------- -10== ASKTEVSSVS-------------------------------------------------- -11== TSRTEVSSVSNS------------------------------------------------ -12== -SRTEVSSVSNS------------------------------------------------ -13== GPHADVTAAGLR------------------------------------------------ -14== ASTS-TTQEPQ------------------------------------------------- -15== ISTS-TTQEQQ------------------------------------------------- -16== QSTT-TQEQ-Q------------------------------------------------- -17== QSTT-TQEQTQ------------------------------------------------- -18== SQSNSTVAQDK------------------------------------------------- -19== NQSSNTQTQEK------------------------------------------------- -20== QSQA-TASEAE------------------------------------------------- -21== QSQA-TASEAE------------------------------------------------- -22== QSQA-TTSEAE------------------------------------------------- -23== ASDTETTSEAD------------------------------------------------- -24== PSDTETTSEAE------------------------------------------------- -25== STTSGTTTVTDN------------------------------------------------ -26== SEASATTTMEEK------------------------------------------------ -27== EEEV-VASERG--GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PPQGYPPQGA -28== ETEI-PAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPPQGYPPQG- -29== HEALKLAERPERP-----------------EFVLQNA----------------------- -30== HEALKLAERPERS-----------------EFVLQNS----------------------- -31=p HEALKLAERPERS-----------------EFVLQNS----------------------- -32== ------------------------------------------------------------ -33=p ------------------------------------------------------------ -34== ------------------------------------------------------------ -35=p ------------------------------------------------------------ -36== ------------------------------------------------------------ - - -1== --------------------------------------------PVSP-A--- -2== -----------------------------------------------P-A--- -3== --------------------------------------------QVSP-A--- -4=p -----------------------------------------------P-A--- -5=p -----------------------------------------------P-A--- -6== -----------------------------------------------PEK--- -7== --------------------------------------------QVGP-N--- -8=opsin, ----------------------------------------------------- -9== --------------------------------------------SVSP-A--- -10== --------------------------------------------SVSP-A--- -11== --------------------------------------------SVAP-A--- -12== --------------------------------------------SVSP-A--- -13== -------------------------------------------NKVMP-AHPV -14== --------------------------------------------QTTA-A--- -15== --------------------------------------------QTTA-A--- -16== --------------------------------------------QTTA-A--- -17== --------------------------------------------QTSA-A--- -18== -------------------------------------------------A--- -19== -------------------------------------------------S--- -20== ----------------------------------------------SK-A--- -21== ----------------------------------------------SK-A--- -22== ----------------------------------------------SK-A--- -23== ----------------------------------------------SK-A--- -24== ----------------------------------------------SK-D--- -25== -------------------------------------------EK-SN-A--- -26== -------------------------------------------PKIPE-A--- -27== YPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQ-A--- -28== YPP-QGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQ-A--- -29== -------------------------------------------DYCRKKGHDS -30== -------------------------------------------DHCGKKGHDT -31=p -------------------------------------------DHCGKKGHDT -32== ----------------------------------------------------- -33=p ----------------------------------------------------- -34== ----------------------------------------------------- -35=p ----------------------------------------------------- -36== -------------------------------------------------A-PQ -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mafft_default.aln Wed Mar 20 07:34:52 2024 +0000 @@ -0,0 +1,468 @@ +> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] +----------------------------------------MNG----------------T +E--G--DNFYVP----FSNKTGLARSPYEYPQY-------YLAEPWK---------YSAL +AAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSM +N-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHAIM +GVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYVVYMF +VVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H +QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------GKNPL +GDDE--SGASTSKTEVSSVS-TSPVSPA-------------------------------- +--------------------------------------------------------- +> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94] +----------------------------------------MNG----------------T +E--G--PNFYVP----FSNITGVVRSPFEQPQY-------YLAEPWQ---------FSML +AAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSL +H-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHAIM +GVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMF +VVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H +QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------GKNPL +GDDE--ASATASKTE------TSQVAPA-------------------------------- +--------------------------------------------------------- +> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9 +----------------------------------------MNG----------------T +E--G--INFYVP----MSNKTGVVRSPFEYPQY-------YLAEPWK---------YRLV +CCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAW +N-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHAMM +GIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYVLYMF +VIHFIIPVVVIFFSYGRLICKV----REAAAQQQ-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N +KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------GKNPF +GDEDVSSTVSQSKTEVSSVS-SSQVSPA-------------------------------- +--------------------------------------------------------- +> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish +----------------------------------------MNG----------------T +E--G--KNFYVP----MSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKIL +ALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAI +N-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHAFA +GIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYVIYMF +VCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N +KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------GKNPL +GDDE-SSTVSTSKTEVSS------VSPA-------------------------------- +--------------------------------------------------------- +> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish +----------------------------------------MNG----------------T +E--G--NNFYVP----LSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKLL +AVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAI +N-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHASA +GIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYVLYMF +ICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N +KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------GKNPL +GDEE-SSTVSTSKTEVSS------VSPA-------------------------------- +--------------------------------------------------------- +> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208- +----------------------------------------MKQ----------------V +PEFH--EDFYIPIPLDINNLS--AYSPFLVPQD-------HLGNQGI---------FMAM +SVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFF +N-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHAIA +GCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYVMFLF +CFCFAVPFGTIVFCYGQLLITL----KLAAKAQA-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-H +RGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC--------GKN-I +EEDE--ASTSSQVTQVSS------VAPEK------------------------------- +--------------------------------------------------------- +> 7== M13299 1 human BCP <>[Science232(4747),193-202'86] +----------------------------------------MRK----------------M +S--E--EEFYL-----FKNIS--SVGPWDGPQY-------HIAPVWA---------FYLQ +AAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASC +N-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHALT +VVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYTWFLF +IFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-N +RNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC--------GKA-M +TDES--DTCSSQKTEVSTVS-STQVGPN-------------------------------- +--------------------------------------------------------- +> 8=opsin, greensensitive human (fragment) S07060 +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------DLAETVIA-STISIVNQV +S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV +GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM +VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------ESESTQK------AEKEVTRMVVVMVLAFC----------------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------------------------- +> 9== K03494 1 human GCP <>[Science232(4747),193-202'86] +----------------------------------------MAQQWSLQRLAGRHPQDSYE +DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT +SVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQV +Y-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLAIV +GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM +VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-N +PGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V +DDGS--ELSSASKTEVSSV---SSVSPA-------------------------------- +--------------------------------------------------------- +> 10== Z68193 1 human Red Opsin <>[] +----------------------------------------MAQQWSLQRLAGRHPQDSYE +DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT +SVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQV +S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV +GIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM +VTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-N +PGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V +DDGS--ELSSASKTEVSSV---SSVSPA-------------------------------- +--------------------------------------------------------- +> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92] +----------------------------------------MTEAWNVAVFAARRSRDD-D +DTTR--GSVFT-----YTNTNN-TRGPFEGPNY-------HIAPRWV---------YNLV +SFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQI +F-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLAII +GIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLM +ITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-N +PGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF---------GKK-V +DDGS--EASTTSRTEVSSVS-NSSVAPA-------------------------------- +--------------------------------------------------------- +> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] +----------------------------------------MAA-WEAAFAARRRHEE--E +DTTR--DSVFT-----YTNSNN-TRGPFEGPNY-------HIAPRWV---------YNLT +SVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQI +S-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLAVA +GILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLM +VTCCFFPLAIIILCYLQVWLAI----RAVAAQQK-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-N +PGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF---------GKK-V +DDGS--EVST-SRTEVSSVS-NSSVSPA-------------------------------- +--------------------------------------------------------- +> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] +----------------------------------------MS-----------------S +NSSQ--AP-----------PNG-TPGPFDGPQW------PYQAPQST---------YVGV +AVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNI +N-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHAVS +GCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSYILSLF +VTCFVLPLSLILFSYTNLLLTL----RAAAAQQK-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-H +KGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY----QPQR-T +GKAS--PGTPGPHADVTAAGLRNKVMPAHPV----------------------------- +--------------------------------------------------------- +> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] +----------MESGNVSS------------SLFGNVST-ALRP----------------E +ARLS--A----------ETRLLGWNVPPEELR--------HIPEHWLTYPEPPESMNYLL +GTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSF +H-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIA +MIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF +FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------------------------- +--------------------------------------------------MN-------- +--VESL------------------------------------------------------ +----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-G +DKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------N +EKAP--ESSAVASTSTTQEP--QQTTAA-------------------------------- +--------------------------------------------------------- +> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 +----------MEYHNVSS------------VL-GNVSS-VLRP----------------D +ARLS--A----------ESRLLGWNVPPDELR--------HIPEHWLIYPEPPESMNYLL +GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSF +H-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIA +MIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF +FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------------------------- +--------------------------------------------------MN-------- +--VDSL------------------------------------------------------ +----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-G +DKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------S +EKAP--ESRAAISTSTTQEQ--QQTTAA-------------------------------- +--------------------------------------------------------- +> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] +----------ME------------------PL-CNASEPPLRP----------------E +AR-S--SG---N----GDLQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML +GVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSF +H-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI +MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF +FFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK------------------------- +--------------------------------------------------MN-------- +--VESL------------------------------------------------------ +----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G +DKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------N +EKSG--EISSAQST-TTQEQ--QQTTAA-------------------------------- +--------------------------------------------------------- +> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 +----------MD------------------AL-CNASEPPLRP----------------E +ARMS--SG---S----DELQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML +GVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSF +H-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI +MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF +LFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK------------------------- +--------------------------------------------------MN-------- +--VESL------------------------------------------------------ +----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G +DKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------N +EKSG--EASSAQST-TTQEQ-TQQTSAA-------------------------------- +--------------------------------------------------------- +> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1 +--------------------------------MTNATGPQMAY----------------Y +GAAS--MD-FGY----PEGVSIVDFVRPEIKP--------YVHQHWYNYPPVNPMWHYLL +GVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCF +SGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKAVV +FALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYNIFIF +VFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK------------------------- +--------------------------------------------------MN-------- +--VSTL------------------------------------------------------ +----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-G +DTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HE-T +ETKS--NDDSQSNSTVAQDK-A-------------------------------------- +--------------------------------------------------------- +> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1 +--------------------------------MANVTGPQMAF----------------Y +GSGA--AT-FGY----PEGMTVADFVPDRVKH--------MVLDHWYNYPPVNPMWHYLL +GVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCF +SGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKATF +MCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYNICIF +IFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK------------------------- +--------------------------------------------------MN-------- +--VTNL------------------------------------------------------ +----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-G +NAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HE-K +DPND--VEENQSSNTQTQEK-S-------------------------------------- +--------------------------------------------------------- +> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] +----------ME----SF------------AVAAAQLGPHFAP----------------L +S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL +TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY +F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG +KM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYLIFYS +IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- +--------------------------------------------------MN-------- +--VKSL------------------------------------------------------ +----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K +F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V +DDGK--SSDAQSQA-TASEA-ESKA----------------------------------- +--------------------------------------------------------- +> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] +----------ME----SF------------AVAAAQLGPHFAP----------------L +S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL +TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY +F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG +KIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS +IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- +--------------------------------------------------MN-------- +--VKSL------------------------------------------------------ +----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K +F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V +DDGK--SSDAQSQA-TASEA-ESKA----------------------------------- +--------------------------------------------------------- +> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' +----------MD----SF------------AAVATQLGPQFAA----------------P +S-----------------NGSVVDKVTPDMAH--------LISPYWDQFPAMDPIWAKIL +TAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY +F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG +KIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS +IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- +--------------------------------------------------MN-------- +--VKSL------------------------------------------------------ +----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-K +F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V +DDGK--SSEAQSQA-TTSEA-ESKA----------------------------------- +--------------------------------------------------------- +> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] +-----MERSHLP----ET------------PFDLAHSGPRFQA----------------Q +SSG---------------NGSVLDNVLPDMAH--------LVNPYWSRFAPMDPMMSKIL +GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY +Y-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM +KILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYLITYS +LFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK------------------------- +--------------------------------------------------MN-------- +--VKSL------------------------------------------------------ +----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-K +I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GN-T +DEPK--PDAPASDTETTSEA-DSKA----------------------------------- +--------------------------------------------------------- +> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 +-----MERSLLP----EP------------PLAMALLGPRFEA----------------Q +TGG---------------NRSVLDNVLPDMAP--------LVNPHWSRFAPMDPTMSKIL +GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY +Y-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM +KIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYLITYS +LFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK------------------------- +--------------------------------------------------MN-------- +--VKSL------------------------------------------------------ +----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-K +I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GT-T +DEPK--PDAPPSDTETTSEA-ESKD----------------------------------- +--------------------------------------------------------- +> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] +--------------------------------MIAVSGPSYEA----------------F +SYGG--QARF-------NNQTVVDKVPPDMLH--------LIDANWYQYPPLNPMWHGIL +GFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCY +Y-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGALI +RIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYLVCYG +IWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK------------------------- +--------------------------------------------------MN-------- +--VASL------------------------------------------------------ +----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-N +L-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------A-A +EPSS--DAVSTTSGTTTVTD-NEKSNA--------------------------------- +--------------------------------------------------------- +> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] +----------------------------------MANQLSYSS----------------L +GWPY--QP----------NASVVDTMPKEMLY--------MIHEHWYAFPPMNPLWYSIL +GVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCF +A-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKATL +LLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYVVIYG +LAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK------------------------- +--------------------------------------------------MN-------- +--VASL------------------------------------------------------ +----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-S +SGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GS-G +ESGS--DVKSEASATTTMEE-KPKIPEA-------------------------------- +--------------------------------------------------------- +> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] +---------------------------------------MVES----------------T +TLVN--QT-WWY------NPTVD------------------IHPHWAKFDPIPDAVYYSV +GIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAF +M-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFL +MIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFILCMY +FCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR------------------------- +--------------------------------------------------LN-------- +--AKEL------------------------------------------------------ +----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-G +PAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKE-C +EDAN--DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP +QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA +> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93] +----------------------------------------MGR----------------D +LRDN--ET-WWY------NPSIV------------------VHPHWREFDQVPDAVYYSL +GIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCF +L-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFI +MIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNILCMF +ILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR------------------------- +--------------------------------------------------LN-------- +--AKEL------------------------------------------------------ +----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-G +PLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKE-T +EDDK--DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP +QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA +> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra +---------MMDVNSSGRPDLYGHLRSFL-LPEVGRGLPDLSPDGGA------------D +PVAGSWAPHLLS------EVTASPAPTWDAPPDNASGCGEQIN--------YGRVEKVVI +GSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL +IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK +MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST +AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- +---------------------------------PGFPRV----EPDSVIALNG------- +--IVKL----------------------QK---------EVEECAN-------------- +----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC +GTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR-- +------KLSAAGMHEALKLAERPERPEF------------VLQNADY------------- +--------------------------------------------CRKKGHDS----- +> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] +------------------------------------------------------------ +------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI +GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL +IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK +MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST +AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- +---------------------------------PGFPRV----QPESVISLNG------- +--VVKL----------------------QK---------EVEECAN-------------- +----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC +GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR----NINR-- +------KLSAAGMHEALKLAERPERSEF------------VLQNSDH------------- +--------------------------------------------CGKKGHDT----- +> 31=p A47425 serotonin receptor 5HT-7 - rat +------------------------------------------------------------ +------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI +GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL +IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK +MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST +AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- +---------------------------------PGFPRV----QPESVISLNG------- +--VVKL----------------------QK---------EVEECAN-------------- +----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC +GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR-- +------KLSAAGMHEALKLAERPERSEF------------VLQNSDH------------- +--------------------------------------------CGKKGHDT----- +> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] +----------MDVLSP-------------------------------------------- +---------------GQGNNTTSPPAPFET-GGNTTGISDVT---------VSYQ--VIT +SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV +L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA +LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST +FGAFYIPLLLMLVLYGRIF-------RAARFRIRK------------------------- +--------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR-- +-------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG-- +PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C +ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RQ---- +------------------------------------------------------------ +--------------------------------------------------------- +> 33=p A35181 serotonin receptor class 1A - rat +----------MDVFSF-------------------------------------------- +---------------GQGNNTTASQEPFGT-GGNVTSISDVT---------FSYQ--VIT +SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV +L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA +LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST +FGAFYIPLLLMLVLYGRIF-------RAARFRIRK------------------------- +--------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG-- +-------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG-- +SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C +ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RR---- +------------------------------------------------------------ +--------------------------------------------------------- +> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] +MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T +SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT +SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI +S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL +MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST +VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY +SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--- +-------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-- +-----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V +DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR-- +------------------------------------------------------------ +--------------------------------------------------------- +> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail +MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T +SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT +SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI +S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL +MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST +VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY +SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--- +-------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-- +-----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V +DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR-- +------------------------------------------------------------ +--------------------------------------------------------- +> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi +-MEGAEGQEELDWEAL-------YLRLP--LQNCSWNSTGWEPNWNV------------T +VVPN--TTWW------------QASAPFDTPAALVRAAAK-------------------- +AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEV +V-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGM +MIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFAT +ASSFYVPVLIILILYWRIY-------QTARKRIR-------------------------- +-------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIA +AAVVAVIGRPLPTISETTTTGFTNVSS----NNTSP---EKQSCANGLEADPPTTGYGAV +AAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-C +DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV----RRRR-- +------A---------------PQ------------------------------------ +---------------------------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mafft_explicit_amino_blosum80.clustal.aln Wed Mar 20 07:34:52 2024 +0000 @@ -0,0 +1,458 @@ +CLUSTAL format alignment by MAFFT FFT-NS-2 (v7.520) + + +1== ----------------------------------------MN------------------ +2== ----------------------------------------MN------------------ +3== ----------------------------------------MN------------------ +4=p ----------------------------------------MN------------------ +5=p ----------------------------------------MN------------------ +6== ----------------------------------------MK------------------ +7== ----------------------------------------MR------------------ +8=opsin, ------------------------------------------------------------ +9== ----------------------------------------MAQQWSLQRLAGRHPQDS-- +10== ----------------------------------------MAQQWSLQRLAGRHPQDS-- +11== ----------------------------------------MTEAWNVAVFAARRSRDD-- +12== ----------------------------------------MA-AWEAAFAARRRHEE--- +13== ----------------------------------------MS------------------ +14== ----------MESGNVS-------------SSLFGNVSTALR------------------ +15== ----------MEYHNVS-------------SVL-GNVSSVLR------------------ +16== ----------ME-------------------PLCNASEPPLR------------------ +17== ----------MD-------------------ALCNASEPPLR------------------ +18== --------------------------------MTNATGPQMA------------------ +19== --------------------------------MANVTGPQMA------------------ +20== ----------ME---SF-------------AVAAAQLGPHFA------------------ +21== ----------ME---SF-------------AVAAAQLGPHFA------------------ +22== ----------MD---SF-------------AAVATQLGPQFA------------------ +23== -----MERSHLP---ET-------------PFDLAHSGPRFQ------------------ +24== -----MERSLLP---EP-------------PLAMALLGPRFE------------------ +25== --------------------------------MIAVSGPSYE------------------ +26== ----------------------------------MANQLSYS------------------ +27== ---------------------------------------MVE------------------ +28== ----------------------------------------MG------------------ +29== ---------MMDVNSSGRPDLYGHLRSFL-LPEVGRGLPDLSPDGGADPVAG-SWAPHLL +30== -------------------------------------------------------MPHLL +31=p -------------------------------------------------------MPHLL +32== ----------MDVLSP-------------------------------------------- +33=p ----------MDVFSF-------------------------------------------- +34== MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYG---L +35=p MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYG---L +36== -MEGAEGQEELDWEAL-------YLRLP--LQNCSWNSTGWEPNWNVTVVPNTTW----- + + +1== -----GTE--GDN-FYVP----FSNKTGLARSPYEYPQY-YLAEPWK-----------YS +2== -----GTE--GPN-FYVP----FSNITGVVRSPFEQPQY-YLAEPWQ-----------FS +3== -----GTE--GIN-FYVP----MSNKTGVVRSPFEYPQY-YLAEPWK-----------YR +4=p -----GTE--GKN-FYVP----MSNRTGLVRSPFEYPQY-YLAEPWQ-----------FK +5=p -----GTE--GNN-FYVP----LSNRTGLVRSPFEYPQY-YLAEPWQ-----------FK +6== -----QVPEFHED-FYIPIPLDINNLSAY--SPFLVPQD-HLGNQGI-----------FM +7== -----KMS--EEE-FYL-----FKNISSV--GPWDGPQY-HIAPVWA-----------FY +8=opsin, ------------------------------------------------------------ +9== ----YEDSTQSSI-FT------YTNSNST-RGPFEGPNY-HIAPRWV-----------YH +10== ----YEDSTQSSI-FT------YTNSNST-RGPFEGPNY-HIAPRWV-----------YH +11== -----DDTTRGSV-FT------YTNTNNT-RGPFEGPNY-HIAPRWV-----------YN +12== -----EDTTRDSV-FT------YTNSNNT-RGPFEGPNY-HIAPRWV-----------YN +13== -----SNSSQAP-------------PNGT-PGPFDGPQWPYQAPQST-----------YV +14== -----PEARLSA-----------ETRLLGWNVPPEELR--HIPEHWLT--YPEPPESMNY +15== -----PDARLSA-----------ESRLLGWNVPPDELR--HIPEHWLI--YPEPPESMNY +16== -----PEAR-SSG---N-----GDLQFLGWNVPPDQIQ--YIPEHWLT--QLEPPASMHY +17== -----PEARMSSG---S-----DELQFLGWNVPPDQIQ--YIPEHWLT--QLEPPASMHY +18== -----YYGAASMD-FGY-----PEGVSIVDFVRPEIKP--YVHQHWYN--YPPVNPMWHY +19== -----FYGSGAAT-FGY-----PEGMTVADFVPDRVKH--MVLDHWYN--YPPVNPMWHY +20== -----PLS----------------NGSVVDKVTPDMAH--LISPYWNQ--FPAMDPIWAK +21== -----PLS----------------NGSVVDKVTPDMAH--LISPYWNQ--FPAMDPIWAK +22== -----APS----------------NGSVVDKVTPDMAH--LISPYWDQ--FPAMDPIWAK +23== -----AQSSG--------------NGSVLDNVLPDMAH--LVNPYWSR--FAPMDPMMSK +24== -----AQTGG--------------NRSVLDNVLPDMAP--LVNPHWSR--FAPMDPTMSK +25== -----AFSYGGQARF--------NNQTVVDKVPPDMLH--LIDANWYQ--YPPLNPMWHG +26== -----SLGWPYQP-----------NASVVDTMPKEMLY--MIHEHWYA--FPPMNPLWYS +27== -----STTLVNQT-WWY-------NPTVD------------IHPHWAK--FDPIPDAVYY +28== -----RDLRDNET-WWY-------NPSIV------------VHPHWRE--FDQVPDAVYY +29== S---EVTASPAPT-WDAP----PDNASGCGEQIN----------------YGRVE---KV +30== SGFLEVTASPAPT-WDAP----PDNVSGCGEQIN----------------YGRVE---KV +31=p SGFLEVTASPAPT-WDAP----PDNVSGCGEQIN----------------YGRVE---KV +32== -GQGNNTTSPPAP-FET-----GGNTTGISDVT-----------------VSYQ-----V +33=p -GQGNNTTASQEP-FGT-----GGNVTSISDVT-----------------FSYQ-----V +34== TGQFINGSHSSRS-RDNA----SANDTSATNMTDD--------RYWSLTVYSHEH---LV +35=p TGQFINGSHSSRS-RDNA----SANDTSATNMTDD--------RYWSLTVYSHEH---LV +36== -------WQASAP-FDTP----AALVRAAAK----------------------------- + + +1== ALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYT +2== MLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYT +3== LVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYT +4=p ILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYT +5=p LLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYT +6== AMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYS +7== LQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVA +8=opsin, --------------------------------------------DLAETVIA-STISIVN +9== LTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVN +10== LTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVN +11== LVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFN +12== LTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVIN +13== GVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSN +14== LLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYN +15== LLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYN +16== MLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--N +17== MLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYN +18== LLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYN +19== LLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYN +20== ILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGIN +21== ILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGIN +22== ILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGIN +23== ILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIIN +24== ILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIIN +25== ILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVIN +26== ILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSN +27== SVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTIS +28== SLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTIS +29== VIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVT +30== VIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVT +31=p VIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVT +32== ITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALY +33=p ITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALY +34== LTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVS +35=p LTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVS +36== --AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVY + . + +1== SMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHA +2== SLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHA +3== AWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHA +4=p AIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHA +5=p AIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHA +6== FFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHA +7== SCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHA +8=opsin, QVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLA +9== QVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLA +10== QVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLA +11== QIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLA +12== QIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLA +13== NIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHA +14== SFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKA +15== SFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKA +16== SFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKA +17== SFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKA +18== CFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKA +19== CFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKA +20== LYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLA +21== LYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLA +22== LYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLA +23== FYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTS +24== FYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTS +25== CYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGA +26== CFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKA +27== AFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRA +28== CFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRA +29== DLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCM +30== DLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCM +31=p DLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCM +32== QVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRA +33=p QVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRA +34== EIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRI +35=p EIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRI +36== EVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRV + : : * : : :*: : : + +1== IMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYVVY +2== IMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFVIY +3== MMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYVLY +4=p FAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYVIY +5=p SAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYVLY +6== IAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYVMF +7== LTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYTWF +8=opsin, IVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIV +9== IVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIV +10== IVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIV +11== IIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFMLT +12== VAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYMVV +13== VSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSYILS +14== IAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVAC +15== IAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVAC +16== VIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGT +17== VIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGT +18== VVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYNIF +19== TFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYNIC +20== LGKM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYLIF +21== LGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIF +22== LGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIF +23== IMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYLIT +24== IMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYLIT +25== LIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYLVC +26== TLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYVVI +27== FLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFILC +28== FIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNILC +29== AKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIY +30== AKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIY +31=p AKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIY +32== AALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIY +33=p AALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIY +34== LLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIF +35=p LLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIF +36== GMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIF + : + +1== MFVVHFLVPFVIIFFCYGRLLCTV----KE------------------------------ +2== MFVVHFTIPMIVIFFCYGQLVFTV----KE------------------------------ +3== MFVIHFIIPVVVIFFSYGRLICKV----RE------------------------------ +4=p MFVCHFILPVAVIFFTYGRLVCTV----KA------------------------------ +5=p MFICHFILPVTIIFFTYGRLVCTV----KA------------------------------ +6== LFCFCFAVPFGTIVFCYGQLLITL----KL------------------------------ +7== LFIFCFIVPLSLICFSYTQLLRAL----KA------------------------------ +8=opsin, LMVTCCITPLSIIVLCYLQVWLAI----RA------------------------------ +9== LMVTCCITPLSIIVLCYLQVWLAI----RA------------------------------ +10== LMVTCCIIPLAIIMLCYLQVWLAI----RA------------------------------ +11== LMITCCFLPLFIIIVCYLQVWMAI----RA------------------------------ +12== LMVTCCFFPLAIIILCYLQVWLAI----RA------------------------------ +13== LFVTCFVLPLSLILFSYTNLLLTL----RA------------------------------ +14== IFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------------ +15== IFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------------ +16== IFFFSFVCPTLMILYYYSQIVGHVFSHEKA------------------------------ +17== IFLFSFVVPTLMILYYYSQIVGHVFNHEKA------------------------------ +18== IFVFDYFLPAAIIVFSYVFIVKAIFAHEAA------------------------------ +19== IFIFDFFLPASVIVFSYVFIVKAIFAHEAA------------------------------ +20== YSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------------ +21== YSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------------ +22== YSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------------ +23== YSLFVYYTPLFLICYSYWFIIAAVAAHEKA------------------------------ +24== YSLFVYYTPLFMICYSYWFIIATVAAHEKA------------------------------ +25== YGIWVYFVPLFLIIYSYWFIIQAVAAHEKN------------------------------ +26== YGLAVYFLPLITMIYCYFFIVHAVAEHEKQ------------------------------ +27== MYFCGFMLPIIIIAFCYFNIVMSVSNHEKE------------------------------ +28== MFILGFFGPILIIFFCYFNIVMSVSNHEKE------------------------------ +29== STAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-----------------PGF------- +30== STAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-----------------PGF------- +31=p STAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-----------------PGF------- +32== STFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------------ +33=p STFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------------ +34== STVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSD +35=p STVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSD +36== ATASSFYVPVLIILILYWRIYQTARKRIR------------------------------- + * : * : + +1== --------------------------AAAAQQ---------------------------- +2== --------------------------AAAQQQ---------------------------- +3== --------------------------AAAQQQ---------------------------- +4=p --------------------------AAAQQQ---------------------------- +5=p --------------------------AAAQQQ---------------------------- +6== --------------------------AAKAQA---------------------------- +7== --------------------------VAAQQQ---------------------------- +8=opsin, --------------------------VAKQQK---------------------------- +9== --------------------------VAKQQK---------------------------- +10== --------------------------VAKQQK---------------------------- +11== --------------------------VAAQQK---------------------------- +12== --------------------------VAAQQK---------------------------- +13== --------------------------AAAQQK---------------------------- +14== --------------------------LRDQAKKM----------------------NVES +15== --------------------------LRDQAKKM----------------------NVDS +16== --------------------------LREQAKKM----------------------NVES +17== --------------------------LREQAKKM----------------------NVES +18== --------------------------MRAQAKKM----------------------NVST +19== --------------------------MRAQAKKM----------------------NVTN +20== --------------------------MREQAKKM----------------------NVKS +21== --------------------------MREQAKKM----------------------NVKS +22== --------------------------MREQAKKM----------------------NVKS +23== --------------------------MREQAKKM----------------------NVKS +24== --------------------------MRDQAKKM----------------------NVKS +25== --------------------------MREQAKKM----------------------NVAS +26== --------------------------LREQAKKM----------------------NVAS +27== --------------------------MAAMAKRL----------------------NAKE +28== --------------------------MAAMAKRL----------------------NAKE +29== --------------------------PRVEPD------------------------SVIA +30== --------------------------PRVQPE------------------------SVIS +31=p --------------------------PRVQPE------------------------SVIS +32== ---------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR------- +33=p ---------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG------- +34== CNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-------- +35=p CNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-------- +36== --------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIAAAVVA + + +1== ------------------------------------------------------------ +2== ------------------------------------------------------------ +3== ------------------------------------------------------------ +4=p ------------------------------------------------------------ +5=p ------------------------------------------------------------ +6== ------------------------------------------------------------ +7== ------------------------------------------------------------ +8=opsin, ------------------------------------------------------------ +9== ------------------------------------------------------------ +10== ------------------------------------------------------------ +11== ------------------------------------------------------------ +12== ------------------------------------------------------------ +13== ------------------------------------------------------------ +14== L----------------------------------------------------------- +15== L----------------------------------------------------------- +16== L----------------------------------------------------------- +17== L----------------------------------------------------------- +18== L----------------------------------------------------------- +19== L----------------------------------------------------------- +20== L----------------------------------------------------------- +21== L----------------------------------------------------------- +22== L----------------------------------------------------------- +23== L----------------------------------------------------------- +24== L----------------------------------------------------------- +25== L----------------------------------------------------------- +26== L----------------------------------------------------------- +27== L----------------------------------------------------------- +28== L----------------------------------------------------------- +29== L-----------------NGIVKLQ--------KEVEECAN------------------L +30== L-----------------NGVVKLQ--------KEVEECAN------------------L +31=p L-----------------NGVVKLQ--------KEVEECAN------------------L +32== --NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCA-- +33=p --DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYA-- +34== --SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS------- +35=p --SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS------- +36== VIGRPLPTISETTTTGFTNVSS----NNTS---PEKQSCANGLEADPPTTGYGAVAAAYY + + +1== --------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-HQGS-- +2== --------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-HQGS-- +3== --------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-NKGA-- +4=p --------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-NKGA-- +5=p --------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-NKGA-- +6== --------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-HRGE-- +7== --------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-NRNH-- +8=opsin, --------ESESTQK------AEKEVTRMVVVMVLAFC---------------------- +9== --------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-NPGY-- +10== --------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-NPGY-- +11== --------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-NPGY-- +12== --------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-NPGY-- +13== --------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-HKGI-- +14== -----RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-GDKT-- +15== -----RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-GDKT-- +16== -----RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDKS-- +17== -----RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDKS-- +18== -----RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-GDTS-- +19== -----RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-GNAE-- +20== -----RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-KF-E-- +21== -----RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-KF-E-- +22== -----RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-KF-E-- +23== -----RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-KI-D-- +24== -----RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-KI-D-- +25== -----RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-NL-V-- +26== -----RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-SSGT-- +27== -----R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-GPAE-- +28== -----R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-GPLE-- +29== SRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCS +30== SRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCS +31=p SRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCS +32== PASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSC- +33=p PACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSC- +34== ------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE-- +35=p ------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE-- +36== PSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-CDCE-- + * . : + +1== DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------GKNPLGDDE- +2== NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------GKNPLGDDE- +3== DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------GKNPFGDEDV +4=p DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------GKNPLGDDE- +5=p AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------GKNPLGDEE- +6== EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMV-C--------GKN-IEEDE- +7== GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMV-C--------GKA-MTDES- +8=opsin, ------------------------------------------------------------ +9== PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF----------GKK-VDDGS- +10== AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF----------GKK-VDDGS- +11== AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF----------GKK-VDDGS- +12== AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF----------GKK-VDDGS- +13== IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLCCG-----YQPQR-TGKAS- +14== LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------NEKAP- +15== LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------SEKAP- +16== LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------NEKSG- +17== LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------NEKSG- +18== GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HE-TETKS- +19== GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HE-KDPND- +20== GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-VDDGK- +21== GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-VDDGK- +22== GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-VDDGK- +23== GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GN-TDEPK- +24== GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GT-TDEPK- +25== KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------A-AEPSS- +26== RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GS-GESGS- +27== WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKE-CEDAN- +28== WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKE-TEDDK- +29== CIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR--------------- +30== CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR--------------- +31=p CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR--------------- +32== HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC--------------- +33=p HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC--------------- +34== GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--------------- +35=p GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--------------- +36== -VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV--------------- + + +1== -SGASTSKTEVSSVS-TSPVSPA------------------------------------- +2== -ASATASKTE------TSQVAPA------------------------------------- +3== SSTVSQSKTEVSSVS-SSQVSPA------------------------------------- +4=p SSTVSTSKTEVSS------VSPA------------------------------------- +5=p SSTVSTSKTEVSS------VSPA------------------------------------- +6== -ASTSSQVTQVSS------VAPEK------------------------------------ +7== -DTCSSQKTEVSTVS-STQVGPN------------------------------------- +8=opsin, ------------------------------------------------------------ +9== -ELSSASKTEVSSV---SSVSPA------------------------------------- +10== -ELSSASKTEVSSV---SSVSPA------------------------------------- +11== -EASTTSRTEVSSVS-NSSVAPA------------------------------------- +12== -EVST-SRTEVSSVS-NSSVSPA------------------------------------- +13== -PGTPGPHADVTAAGLRNKVMPAHPV---------------------------------- +14== -ESSAVASTSTTQEP--QQTTAA------------------------------------- +15== -ESRAAISTSTTQEQ--QQTTAA------------------------------------- +16== -EISSAQST-TTQEQ--QQTTAA------------------------------------- +17== -EASSAQST-TTQEQ-TQQTSAA------------------------------------- +18== -NDDSQSNSTVAQDK-A------------------------------------------- +19== -VEENQSSNTQTQEK-S------------------------------------------- +20== -SSDAQSQA-TASEA-ESKA---------------------------------------- +21== -SSDAQSQA-TASEA-ESKA---------------------------------------- +22== -SSEAQSQA-TTSEA-ESKA---------------------------------------- +23== -PDAPASDTETTSEA-DSKA---------------------------------------- +24== -PDAPPSDTETTSEA-ESKD---------------------------------------- +25== -DAVSTTSGTTTVTD-NEKSNA-------------------------------------- +26== -DVKSEASATTTMEE-KPKIPEA------------------------------------- +27== -DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PPQGYPP +28== -DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPPQGYPP +29== -NINRKLSAAGMHEALKLAERPERPEFVL--------QNADY------------------ +30== -NINRKLSAAGMHEALKLAERPERSEFVL--------QNSDH------------------ +31=p -NINRKLSAAGMHEALKLAERPERSEFVL--------QNSDH------------------ +32== -RQ--------------------------------------------------------- +33=p -RR--------------------------------------------------------- +34== -RGHR------------------------------------------------------- +35=p -RGHR------------------------------------------------------- +36== -RRRRA---------------PQ------------------------------------- + + +1== ---------------------------------------------------- +2== ---------------------------------------------------- +3== ---------------------------------------------------- +4=p ---------------------------------------------------- +5=p ---------------------------------------------------- +6== ---------------------------------------------------- +7== ---------------------------------------------------- +8=opsin, ---------------------------------------------------- +9== ---------------------------------------------------- +10== ---------------------------------------------------- +11== ---------------------------------------------------- +12== ---------------------------------------------------- +13== ---------------------------------------------------- +14== ---------------------------------------------------- +15== ---------------------------------------------------- +16== ---------------------------------------------------- +17== ---------------------------------------------------- +18== ---------------------------------------------------- +19== ---------------------------------------------------- +20== ---------------------------------------------------- +21== ---------------------------------------------------- +22== ---------------------------------------------------- +23== ---------------------------------------------------- +24== ---------------------------------------------------- +25== ---------------------------------------------------- +26== ---------------------------------------------------- +27== QGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA +28== QGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA +29== ---------------------------------------CRKKGHDS----- +30== ---------------------------------------CGKKGHDT----- +31=p ---------------------------------------CGKKGHDT----- +32== ---------------------------------------------------- +33=p ---------------------------------------------------- +34== ---------------------------------------------------- +35=p ---------------------------------------------------- +36== ---------------------------------------------------- +
--- a/test-data/mafft_fftns_result.aln Tue Oct 31 15:48:53 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,468 +0,0 @@ -> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] -----------------------------------------MNG----------------T -E--G--DNFYVP----FSNKTGLARSPYEYPQY-------YLAEPWK---------YSAL -AAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSM -N-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHAIM -GVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYVVYMF -VVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H -QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------GKNPL -GDDE--SGASTSKTEVSSVS-TSPVSPA-------------------------------- ---------------------------------------------------------- -> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94] -----------------------------------------MNG----------------T -E--G--PNFYVP----FSNITGVVRSPFEQPQY-------YLAEPWQ---------FSML -AAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSL -H-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHAIM -GVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMF -VVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H -QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------GKNPL -GDDE--ASATASKTE------TSQVAPA-------------------------------- ---------------------------------------------------------- -> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9 -----------------------------------------MNG----------------T -E--G--INFYVP----MSNKTGVVRSPFEYPQY-------YLAEPWK---------YRLV -CCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAW -N-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHAMM -GIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYVLYMF -VIHFIIPVVVIFFSYGRLICKV----REAAAQQQ-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N -KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------GKNPF -GDEDVSSTVSQSKTEVSSVS-SSQVSPA-------------------------------- ---------------------------------------------------------- -> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish -----------------------------------------MNG----------------T -E--G--KNFYVP----MSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKIL -ALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAI -N-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHAFA -GIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYVIYMF -VCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N -KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------GKNPL -GDDE-SSTVSTSKTEVSS------VSPA-------------------------------- ---------------------------------------------------------- -> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish -----------------------------------------MNG----------------T -E--G--NNFYVP----LSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKLL -AVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAI -N-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHASA -GIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYVLYMF -ICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N -KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------GKNPL -GDEE-SSTVSTSKTEVSS------VSPA-------------------------------- ---------------------------------------------------------- -> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208- -----------------------------------------MKQ----------------V -PEFH--EDFYIPIPLDINNLS--AYSPFLVPQD-------HLGNQGI---------FMAM -SVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFF -N-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHAIA -GCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYVMFLF -CFCFAVPFGTIVFCYGQLLITL----KLAAKAQA-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-H -RGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC--------GKN-I -EEDE--ASTSSQVTQVSS------VAPEK------------------------------- ---------------------------------------------------------- -> 7== M13299 1 human BCP <>[Science232(4747),193-202'86] -----------------------------------------MRK----------------M -S--E--EEFYL-----FKNIS--SVGPWDGPQY-------HIAPVWA---------FYLQ -AAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASC -N-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHALT -VVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYTWFLF -IFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-N -RNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC--------GKA-M -TDES--DTCSSQKTEVSTVS-STQVGPN-------------------------------- ---------------------------------------------------------- -> 8=opsin, greensensitive human (fragment) S07060 ------------------------------------------------------------- ------------------------------------------------------------- -------------------------------------------DLAETVIA-STISIVNQV -S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV -GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM -VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------ESESTQK------AEKEVTRMVVVMVLAFC----------------- ------------------------------------------------------------- ------------------------------------------------------------- ---------------------------------------------------------- -> 9== K03494 1 human GCP <>[Science232(4747),193-202'86] -----------------------------------------MAQQWSLQRLAGRHPQDSYE -DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT -SVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQV -Y-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLAIV -GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM -VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-N -PGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V -DDGS--ELSSASKTEVSSV---SSVSPA-------------------------------- ---------------------------------------------------------- -> 10== Z68193 1 human Red Opsin <>[] -----------------------------------------MAQQWSLQRLAGRHPQDSYE -DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT -SVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQV -S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV -GIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM -VTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-N -PGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V -DDGS--ELSSASKTEVSSV---SSVSPA-------------------------------- ---------------------------------------------------------- -> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92] -----------------------------------------MTEAWNVAVFAARRSRDD-D -DTTR--GSVFT-----YTNTNN-TRGPFEGPNY-------HIAPRWV---------YNLV -SFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQI -F-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLAII -GIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLM -ITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-N -PGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF---------GKK-V -DDGS--EASTTSRTEVSSVS-NSSVAPA-------------------------------- ---------------------------------------------------------- -> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] -----------------------------------------MAA-WEAAFAARRRHEE--E -DTTR--DSVFT-----YTNSNN-TRGPFEGPNY-------HIAPRWV---------YNLT -SVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQI -S-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLAVA -GILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLM -VTCCFFPLAIIILCYLQVWLAI----RAVAAQQK-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-N -PGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF---------GKK-V -DDGS--EVST-SRTEVSSVS-NSSVSPA-------------------------------- ---------------------------------------------------------- -> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] -----------------------------------------MS-----------------S -NSSQ--AP-----------PNG-TPGPFDGPQW------PYQAPQST---------YVGV -AVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNI -N-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHAVS -GCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSYILSLF -VTCFVLPLSLILFSYTNLLLTL----RAAAAQQK-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-H -KGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY----QPQR-T -GKAS--PGTPGPHADVTAAGLRNKVMPAHPV----------------------------- ---------------------------------------------------------- -> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] -----------MESGNVSS------------SLFGNVST-ALRP----------------E -ARLS--A----------ETRLLGWNVPPEELR--------HIPEHWLTYPEPPESMNYLL -GTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSF -H-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIA -MIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF -FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------------------------- ---------------------------------------------------MN-------- ---VESL------------------------------------------------------ -----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-G -DKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------N -EKAP--ESSAVASTSTTQEP--QQTTAA-------------------------------- ---------------------------------------------------------- -> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 -----------MEYHNVSS------------VL-GNVSS-VLRP----------------D -ARLS--A----------ESRLLGWNVPPDELR--------HIPEHWLIYPEPPESMNYLL -GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSF -H-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIA -MIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF -FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------------------------- ---------------------------------------------------MN-------- ---VDSL------------------------------------------------------ -----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-G -DKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------S -EKAP--ESRAAISTSTTQEQ--QQTTAA-------------------------------- ---------------------------------------------------------- -> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] -----------ME------------------PL-CNASEPPLRP----------------E -AR-S--SG---N----GDLQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML -GVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSF -H-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI -MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF -FFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK------------------------- ---------------------------------------------------MN-------- ---VESL------------------------------------------------------ -----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G -DKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------N -EKSG--EISSAQST-TTQEQ--QQTTAA-------------------------------- ---------------------------------------------------------- -> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 -----------MD------------------AL-CNASEPPLRP----------------E -ARMS--SG---S----DELQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML -GVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSF -H-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI -MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF -LFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK------------------------- ---------------------------------------------------MN-------- ---VESL------------------------------------------------------ -----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G -DKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------N -EKSG--EASSAQST-TTQEQ-TQQTSAA-------------------------------- ---------------------------------------------------------- -> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1 ---------------------------------MTNATGPQMAY----------------Y -GAAS--MD-FGY----PEGVSIVDFVRPEIKP--------YVHQHWYNYPPVNPMWHYLL -GVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCF -SGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKAVV -FALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYNIFIF -VFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK------------------------- ---------------------------------------------------MN-------- ---VSTL------------------------------------------------------ -----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-G -DTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HE-T -ETKS--NDDSQSNSTVAQDK-A-------------------------------------- ---------------------------------------------------------- -> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1 ---------------------------------MANVTGPQMAF----------------Y -GSGA--AT-FGY----PEGMTVADFVPDRVKH--------MVLDHWYNYPPVNPMWHYLL -GVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCF -SGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKATF -MCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYNICIF -IFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK------------------------- ---------------------------------------------------MN-------- ---VTNL------------------------------------------------------ -----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-G -NAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HE-K -DPND--VEENQSSNTQTQEK-S-------------------------------------- ---------------------------------------------------------- -> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] -----------ME----SF------------AVAAAQLGPHFAP----------------L -S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL -TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY -F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG -KM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYLIFYS -IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- ---------------------------------------------------MN-------- ---VKSL------------------------------------------------------ -----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K -F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V -DDGK--SSDAQSQA-TASEA-ESKA----------------------------------- ---------------------------------------------------------- -> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] -----------ME----SF------------AVAAAQLGPHFAP----------------L -S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL -TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY -F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG -KIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS -IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- ---------------------------------------------------MN-------- ---VKSL------------------------------------------------------ -----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K -F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V -DDGK--SSDAQSQA-TASEA-ESKA----------------------------------- ---------------------------------------------------------- -> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' -----------MD----SF------------AAVATQLGPQFAA----------------P -S-----------------NGSVVDKVTPDMAH--------LISPYWDQFPAMDPIWAKIL -TAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY -F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG -KIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS -IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- ---------------------------------------------------MN-------- ---VKSL------------------------------------------------------ -----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-K -F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V -DDGK--SSEAQSQA-TTSEA-ESKA----------------------------------- ---------------------------------------------------------- -> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] ------MERSHLP----ET------------PFDLAHSGPRFQA----------------Q -SSG---------------NGSVLDNVLPDMAH--------LVNPYWSRFAPMDPMMSKIL -GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY -Y-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM -KILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYLITYS -LFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK------------------------- ---------------------------------------------------MN-------- ---VKSL------------------------------------------------------ -----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-K -I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GN-T -DEPK--PDAPASDTETTSEA-DSKA----------------------------------- ---------------------------------------------------------- -> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 ------MERSLLP----EP------------PLAMALLGPRFEA----------------Q -TGG---------------NRSVLDNVLPDMAP--------LVNPHWSRFAPMDPTMSKIL -GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY -Y-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM -KIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYLITYS -LFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK------------------------- ---------------------------------------------------MN-------- ---VKSL------------------------------------------------------ -----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-K -I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GT-T -DEPK--PDAPPSDTETTSEA-ESKD----------------------------------- ---------------------------------------------------------- -> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] ---------------------------------MIAVSGPSYEA----------------F -SYGG--QARF-------NNQTVVDKVPPDMLH--------LIDANWYQYPPLNPMWHGIL -GFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCY -Y-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGALI -RIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYLVCYG -IWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK------------------------- ---------------------------------------------------MN-------- ---VASL------------------------------------------------------ -----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-N -L-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------A-A -EPSS--DAVSTTSGTTTVTD-NEKSNA--------------------------------- ---------------------------------------------------------- -> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] -----------------------------------MANQLSYSS----------------L -GWPY--QP----------NASVVDTMPKEMLY--------MIHEHWYAFPPMNPLWYSIL -GVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCF -A-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKATL -LLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYVVIYG -LAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK------------------------- ---------------------------------------------------MN-------- ---VASL------------------------------------------------------ -----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-S -SGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GS-G -ESGS--DVKSEASATTTMEE-KPKIPEA-------------------------------- ---------------------------------------------------------- -> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] ----------------------------------------MVES----------------T -TLVN--QT-WWY------NPTVD------------------IHPHWAKFDPIPDAVYYSV -GIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAF -M-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFL -MIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFILCMY -FCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR------------------------- ---------------------------------------------------LN-------- ---AKEL------------------------------------------------------ -----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-G -PAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKE-C -EDAN--DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP -QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA -> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93] -----------------------------------------MGR----------------D -LRDN--ET-WWY------NPSIV------------------VHPHWREFDQVPDAVYYSL -GIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCF -L-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFI -MIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNILCMF -ILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR------------------------- ---------------------------------------------------LN-------- ---AKEL------------------------------------------------------ -----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-G -PLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKE-T -EDDK--DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP -QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA -> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra ----------MMDVNSSGRPDLYGHLRSFL-LPEVGRGLPDLSPDGGA------------D -PVAGSWAPHLLS------EVTASPAPTWDAPPDNASGCGEQIN--------YGRVEKVVI -GSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL -IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK -MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST -AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- ----------------------------------PGFPRV----EPDSVIALNG------- ---IVKL----------------------QK---------EVEECAN-------------- -----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC -GTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR-- -------KLSAAGMHEALKLAERPERPEF------------VLQNADY------------- ---------------------------------------------CRKKGHDS----- -> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] ------------------------------------------------------------- -------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI -GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL -IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK -MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST -AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- ----------------------------------PGFPRV----QPESVISLNG------- ---VVKL----------------------QK---------EVEECAN-------------- -----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC -GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR----NINR-- -------KLSAAGMHEALKLAERPERSEF------------VLQNSDH------------- ---------------------------------------------CGKKGHDT----- -> 31=p A47425 serotonin receptor 5HT-7 - rat ------------------------------------------------------------- -------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI -GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL -IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK -MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST -AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- ----------------------------------PGFPRV----QPESVISLNG------- ---VVKL----------------------QK---------EVEECAN-------------- -----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC -GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR-- -------KLSAAGMHEALKLAERPERSEF------------VLQNSDH------------- ---------------------------------------------CGKKGHDT----- -> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] -----------MDVLSP-------------------------------------------- ----------------GQGNNTTSPPAPFET-GGNTTGISDVT---------VSYQ--VIT -SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV -L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA -LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST -FGAFYIPLLLMLVLYGRIF-------RAARFRIRK------------------------- ---------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR-- --------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG-- -PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C -ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RQ---- ------------------------------------------------------------- ---------------------------------------------------------- -> 33=p A35181 serotonin receptor class 1A - rat -----------MDVFSF-------------------------------------------- ----------------GQGNNTTASQEPFGT-GGNVTSISDVT---------FSYQ--VIT -SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV -L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA -LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST -FGAFYIPLLLMLVLYGRIF-------RAARFRIRK------------------------- ---------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG-- --------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG-- -SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C -ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RR---- ------------------------------------------------------------- ---------------------------------------------------------- -> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] -MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T -SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT -SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI -S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL -MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST -VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY -SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--- --------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-- ------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V -DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR-- ------------------------------------------------------------- ---------------------------------------------------------- -> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail -MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T -SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT -SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI -S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL -MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST -VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY -SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--- --------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-- ------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V -DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR-- ------------------------------------------------------------- ---------------------------------------------------------- -> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi --MEGAEGQEELDWEAL-------YLRLP--LQNCSWNSTGWEPNWNV------------T -VVPN--TTWW------------QASAPFDTPAALVRAAAK-------------------- -AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEV -V-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGM -MIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFAT -ASSFYVPVLIILILYWRIY-------QTARKRIR-------------------------- --------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIA -AAVVAVIGRPLPTISETTTTGFTNVSS----NNTSP---EKQSCANGLEADPPTTGYGAV -AAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-C -DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV----RRRR-- -------A---------------PQ------------------------------------ ----------------------------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mafft_kimura40.phylip.aln Wed Mar 20 07:34:52 2024 +0000 @@ -0,0 +1,77 @@ + 3 948 +MZ681498.1 atgcatgtat aagtataacc tgccagacag ggaaactgcg gacggctcat +MZ681497.1 ---------- ---------- ---------- -----ctgcg gacggctcat +ON855043.1 ---------- ---------- -ggccgtca- -----ctccg gggggacaac + + tacaacagcc ttaatttact tgaccttgac aacctacttg gataactgcg + tacaacagcc ttaatttact tgaccttgac aacctacttg gataactgcg + acaaacgccc tgagggcttt actcgttggg gtgcaaactg ga-------- + + gtaattctgg agctaataca tgcaccaaag ctccgatccc tcgtggagag + gtaattctgg agctaataca tgcaccaaag ctccgatccc tcgtggagag + ---------- ---------- ---------- ---------- ---tcgagtg + + gagcgcattt gttcgccaca aaaccaaacg ccccacaagg gcgttcagtg + gagcgcattt gttcgccaca aaaccaaacg ccccacaagg gcgttcagtg + gcgcacacat cccttccacg caa--agacc tgctgaagag gtcggaggcg + + ttgactcaga ataact---- ---------- -------aag ctgatcgcac + ttgactcaga ataact---- ---------- -------aag ctgatcgcac + atgagtccga gcaaccccac aagcaaccag gttggggaag ctg--cacac + + ggtctt---- ---------- ----gcaccg gcgacgtgtc tttcaagtgt + ggtctt---- ---------- ----gcaccg gcgacgtgtc tttcaagtgt + gatactggga tgcacgcccc cagggcacct aacggctgcc gctggcgtct + + ctgccttatc aactttcgat ggtagtgtat ctgcctacca tggttgtgac + ctgccttatc aactttcgat ggtagtgtat ctgcctacca tggttgtgac + gtgcgtcgtt ga---gcagt tgttgcgcac ttgctt---- ttgtcggagc + + gggtaacgga ggataagggt tcgactccgg agaaggggcc tgagaaatgg + gggtaacgga ggataagggt tcgactccgg agaaggggcc tgagaaatgg + tgtactcgga gcatgctggc atggacccac acaaaag--- ---------- + + ccactacgtc taaggatggc agcaggcgcg caaattaccc actctcaaca + ccactacgtc taaggatggc agcaggcgcg caaattaccc actctcaaca + ---------- --tgtgtggc agcggccaca ca------cc cctgtccatg + + cgctgaggag gtagtgaaga gaaataacga gaccgttctc acatgaggcc + cgctgaggag gtagtgaaga gaaataacga gaccgttctc acatgaggcc + tcctacgga- ---------- -ccgtagcta gggcgtgct- ---------- + + ggtcatcgga atgggtacaa cttaaaccct ttaacgagta tctatgagag + ggtcatcgga atgggtacaa cttaaaccct ttaacgagta tctatgagag + ---------- ---------- ---------- ---------- ---------- + + ggcaagtctg gtgccagcag ccgcggtaat tccagctctc aaaatgcata + ggcaagtctg gtgccagcag ccgcggtaat tccagctctc aaaatgcata + ---------- ---------- -ctgggtttc ttcggctggc agtgttgcta + + gaattattgc tgcggttaaa aagctcgtag ttggatctgt gctggccgcc + gaattattgc tgcggttaaa aagctcgtag ttggatctgt gctggccgcc + cgtccgtggc tgtgatgaga cgacgcg--- ---------- ---------- + + cggtctgctc gctgagcacg cactggtgcg gtcggctttc ctgcccggta + cggtctgctc gctgagcacg cactggtgcg gtcggctttc ctgcccggta + ---------- -----gtagg gccttgtgcg atgcgcct-- ---------- + + cctccccggc gttggccttc accggtcggc gtcggtggcc gggcgagttt + cctccccggc gttggccttc accggtcggc gtcggtggcc gggcgagttt + --------gc acttggctta a--------- ------gact tgatgagctc + + actttgaaca aatcagag-- -----tgctt caaacaggcg tttcgcttga + actttgaaca aatcagag-- -----tgctt caaacaggcg tttcgcttga + actgcgaaga gccgccagca accttttttt catatacatt ttttacaggc + + atgttcgtgc atggaataat agaagaggat ttcggtccga ttttgttggt + atgttcgtgc atggaataat agaagaggat ttcggtccga ttttgttggt + acacttgtgt gctgatgaac aaaa------ ---------- ---------- + + tttgctgacc gagataatgg ttaacagaga caaacggggc cattcgtatt + tttgctgacc gagataatgg ttaacagaga caaacggggc cattcgtatt + ---------- ---------- ---------- ---------- -------att + + gctacgtgag aggtg----- ---------- ---------- -------- + gctacgtgag aggtgaaatt cttggaccgt agcaagacgg actacagc + ctagccttat cggtggatca ctcggctcgt aggtcgatg- -------- +
--- a/test-data/mafft_nwns_result.aln Tue Oct 31 15:48:53 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,458 +0,0 @@ -CLUSTAL format alignment by MAFFT NW-NS-2 (v7.455) - - -1== ----------------------------------------MNG----------------T -2== ----------------------------------------MNG----------------T -3== ----------------------------------------MNG----------------T -4=p ----------------------------------------MNG----------------T -5=p ----------------------------------------MNG----------------T -6== ----------------------------------------MKQ----------------V -7== ----------------------------------------MRK----------------M -8=opsin, ------------------------------------------------------------ -9== ----------------------------------------MAQQWSLQRLAGRHPQDSYE -10== ----------------------------------------MAQQWSLQRLAGRHPQDSYE -11== ----------------------------------------MTEAWNVAVFAARRSRDD-D -12== ----------------------------------------MAA-WEAAFAARRRHEE--E -13== ----------------------------------------MS-----------------S -14== ----------MESGNVSS------------SLFGNVST-ALRP----------------E -15== ----------MEYHNVSS------------VL-GNVSS-VLRP----------------D -16== ----------ME------------------PL-CNASEPPLRP----------------E -17== ----------MD------------------AL-CNASEPPLRP----------------E -18== --------------------------------MTNATGPQMAY----------------Y -19== --------------------------------MANVTGPQMAF----------------Y -20== ----------ME----SF------------AVAAAQLGPHFAP----------------L -21== ----------ME----SF------------AVAAAQLGPHFAP----------------L -22== ----------MD----SF------------AAVATQLGPQFAA----------------P -23== -----MERSHLP----ET------------PFDLAHSGPRFQA----------------Q -24== -----MERSLLP----EP------------PLAMALLGPRFEA----------------Q -25== --------------------------------MIAVSGPSYEA----------------F -26== ----------------------------------MANQLSYSS----------------L -27== ---------------------------------------MVES----------------T -28== ----------------------------------------MGR----------------D -29== ---------MMDVNSSGRPDLYGHLRSFL-LPEVGRGLPDLSPDGGA------------D -30== ------------------------------------------------------------ -31=p ------------------------------------------------------------ -32== ----------MDVLSP-------------------------------------------- -33=p ----------MDVFSF-------------------------------------------- -34== MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T -35=p MANFTFGDLALDVARMG-----GLASTPSGLRSTGLTTPGLSPTGLV------------T -36== -MEGAEGQEELDWEAL-------YLRLP--LQNCSWNSTGWEPNWNV------------T - - -1== E--G--DNFYVP----FSNKTGLARSPYEYPQY-------YLAEPWK---------YSAL -2== E--G--PNFYVP----FSNITGVVRSPFEQPQY-------YLAEPWQ---------FSML -3== E--G--INFYVP----MSNKTGVVRSPFEYPQY-------YLAEPWK---------YRLV -4=p E--G--KNFYVP----MSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKIL -5=p E--G--NNFYVP----LSNRTGLVRSPFEYPQY-------YLAEPWQ---------FKLL -6== PEFH--EDFYIPIPLDINNLS--AYSPFLVPQD-------HLGNQGI---------FMAM -7== S--E--EEFYL-----FKNIS--SVGPWDGPQY-------HIAPVWA---------FYLQ -8=opsin, ------------------------------------------------------------ -9== DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT -10== DSTQ--SSIFT-----YTNSNS-TRGPFEGPNY-------HIAPRWV---------YHLT -11== DTTR--GSVFT-----YTNTNN-TRGPFEGPNY-------HIAPRWV---------YNLV -12== DTTR--DSVFT-----YTNSNN-TRGPFEGPNY-------HIAPRWV---------YNLT -13== NSSQ--AP-----------PNG-TPGPFDGPQW------PYQAPQST---------YVGV -14== ARLS--A----------ETRLLGWNVPPEELR--------HIPEHWLTYPEPPESMNYLL -15== ARLS--A----------ESRLLGWNVPPDELR--------HIPEHWLIYPEPPESMNYLL -16== AR-S--SG---N----GDLQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML -17== ARMS--SG---S----DELQFLGWNVPPDQIQ--------YIPEHWLTQLEPPASMHYML -18== GAAS--MD-FGY----PEGVSIVDFVRPEIKP--------YVHQHWYNYPPVNPMWHYLL -19== GSGA--AT-FGY----PEGMTVADFVPDRVKH--------MVLDHWYNYPPVNPMWHYLL -20== S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL -21== S-----------------NGSVVDKVTPDMAH--------LISPYWNQFPAMDPIWAKIL -22== S-----------------NGSVVDKVTPDMAH--------LISPYWDQFPAMDPIWAKIL -23== SSG---------------NGSVLDNVLPDMAH--------LVNPYWSRFAPMDPMMSKIL -24== TGG---------------NRSVLDNVLPDMAP--------LVNPHWSRFAPMDPTMSKIL -25== SYGG--QARF-------NNQTVVDKVPPDMLH--------LIDANWYQYPPLNPMWHGIL -26== GWPY--QP----------NASVVDTMPKEMLY--------MIHEHWYAFPPMNPLWYSIL -27== TLVN--QT-WWY------NPTVD------------------IHPHWAKFDPIPDAVYYSV -28== LRDN--ET-WWY------NPSIV------------------VHPHWREFDQVPDAVYYSL -29== PVAGSWAPHLLS------EVTASPAPTWDAPPDNASGCGEQIN--------YGRVEKVVI -30== ------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI -31=p ------MPHLLS---GFLEVTASPAPTWDAPPDNVSGCGEQIN--------YGRVEKVVI -32== ---------------GQGNNTTSPPAPFET-GGNTTGISDVT---------VSYQ--VIT -33=p ---------------GQGNNTTASQEPFGT-GGNVTSISDVT---------FSYQ--VIT -34== SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT -35=p SDFN--DSYGLT---GQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLT -36== VVPN--TTWW------------QASAPFDTPAALVRAAAK-------------------- - - -1== AAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSM -2== AAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSL -3== CCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAW -4=p ALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAI -5=p AVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAI -6== SVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFF -7== AAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASC -8=opsin, ------------------------------------------DLAETVIA-STISIVNQV -9== SVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQV -10== SVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQV -11== SFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQI -12== SVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQI -13== AVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNI -14== GTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSF -15== GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSF -16== GVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSF -17== GVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSF -18== GVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCF -19== GVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCF -20== TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY -21== TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY -22== TAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY -23== GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY -24== GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY -25== GFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCY -26== GVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCF -27== GIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAF -28== GIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCF -29== GSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL -30== GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL -31=p GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL -32== SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV -33=p SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV -34== SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI -35=p SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI -36== AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEV - . - -1== N-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHAIM -2== H-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHAIM -3== N-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHAMM -4=p N-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHAFA -5=p N-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHASA -6== N-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHAIA -7== N-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHALT -8=opsin, S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV -9== Y-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLAIV -10== S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIV -11== F-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLAII -12== S-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLAVA -13== N-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHAVS -14== H-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIA -15== H-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIA -16== H-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI -17== H-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVI -18== SGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKAVV -19== SGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKATF -20== F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG -21== F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG -22== F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALG -23== Y-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM -24== Y-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIM -25== Y-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGALI -26== A-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKATL -27== M-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFL -28== L-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFI -29== IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK -30== IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK -31=p IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK -32== L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA -33=p L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA -34== S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL -35=p S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL -36== V-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGM - : : * : : :*: : : - -1== GVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYVVYMF -2== GVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMF -3== GIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYVLYMF -4=p GIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYVIYMF -5=p GIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYVLYMF -6== GCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYVMFLF -7== VVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYTWFLF -8=opsin, GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM -9== GIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM -10== GIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLM -11== GIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLM -12== GILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLM -13== GCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSYILSLF -14== MIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF -15== MIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFVACIF -16== MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF -17== MNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFVGTIF -18== FALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYNIFIF -19== MCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYNICIF -20== KM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYLIFYS -21== KIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS -22== KIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYLIFYS -23== KILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYLITYS -24== KIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYLITYS -25== RIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYLVCYG -26== LLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYVVIYG -27== MIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFILCMY -28== MIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNILCMF -29== MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST -30== MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST -31=p MILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYTIYST -32== LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST -33=p LISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYST -34== MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST -35=p MIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYTIFST -36== MIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFAT - : - -1== VVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ-------------------------- -2== VVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ-------------------------- -3== VIHFIIPVVVIFFSYGRLICKV----REAAAQQQ-------------------------- -4=p VCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ-------------------------- -5=p ICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ-------------------------- -6== CFCFAVPFGTIVFCYGQLLITL----KLAAKAQA-------------------------- -7== IFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ-------------------------- -8=opsin, VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK-------------------------- -9== VTCCITPLSIIVLCYLQVWLAI----RAVAKQQK-------------------------- -10== VTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK-------------------------- -11== ITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK-------------------------- -12== VTCCFFPLAIIILCYLQVWLAI----RAVAAQQK-------------------------- -13== VTCFVLPLSLILFSYTNLLLTL----RAAAAQQK-------------------------- -14== FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------------------------- -15== FFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK------------------------- -16== FFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK------------------------- -17== LFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK------------------------- -18== VFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK------------------------- -19== IFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK------------------------- -20== IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- -21== IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- -22== IFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK------------------------- -23== LFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK------------------------- -24== LFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK------------------------- -25== IWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK------------------------- -26== LAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK------------------------- -27== FCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR------------------------- -28== ILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR------------------------- -29== AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- -30== AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- -31=p AVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF---------------------- -32== FGAFYIPLLLMLVLYGRIF-------RAARFRIRK------------------------- -33=p FGAFYIPLLLMLVLYGRIF-------RAARFRIRK------------------------- -34== VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY -35=p VGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETTLVASPKTEY -36== ASSFYVPVLIILILYWRIY-------QTARKRIR-------------------------- - * : * : - -1== ------------------------------------------------------------ -2== ------------------------------------------------------------ -3== ------------------------------------------------------------ -4=p ------------------------------------------------------------ -5=p ------------------------------------------------------------ -6== ------------------------------------------------------------ -7== ------------------------------------------------------------ -8=opsin, ------------------------------------------------------------ -9== ------------------------------------------------------------ -10== ------------------------------------------------------------ -11== ------------------------------------------------------------ -12== ------------------------------------------------------------ -13== ------------------------------------------------------------ -14== --------------------------------------------------MN-------- -15== --------------------------------------------------MN-------- -16== --------------------------------------------------MN-------- -17== --------------------------------------------------MN-------- -18== --------------------------------------------------MN-------- -19== --------------------------------------------------MN-------- -20== --------------------------------------------------MN-------- -21== --------------------------------------------------MN-------- -22== --------------------------------------------------MN-------- -23== --------------------------------------------------MN-------- -24== --------------------------------------------------MN-------- -25== --------------------------------------------------MN-------- -26== --------------------------------------------------MN-------- -27== --------------------------------------------------LN-------- -28== --------------------------------------------------LN-------- -29== ---------------------------------PGFPRV----EPDSVIALNG------- -30== ---------------------------------PGFPRV----QPESVISLNG------- -31=p ---------------------------------PGFPRV----QPESVISLNG------- -32== --------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR-- -33=p --------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG-- -34== SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--- -35=p SVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--- -36== -------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIA - - -1== ------------------------------------------------------------ -2== ------------------------------------------------------------ -3== ------------------------------------------------------------ -4=p ------------------------------------------------------------ -5=p ------------------------------------------------------------ -6== ------------------------------------------------------------ -7== ------------------------------------------------------------ -8=opsin, ------------------------------------------------------------ -9== ------------------------------------------------------------ -10== ------------------------------------------------------------ -11== ------------------------------------------------------------ -12== ------------------------------------------------------------ -13== ------------------------------------------------------------ -14== --VESL------------------------------------------------------ -15== --VDSL------------------------------------------------------ -16== --VESL------------------------------------------------------ -17== --VESL------------------------------------------------------ -18== --VSTL------------------------------------------------------ -19== --VTNL------------------------------------------------------ -20== --VKSL------------------------------------------------------ -21== --VKSL------------------------------------------------------ -22== --VKSL------------------------------------------------------ -23== --VKSL------------------------------------------------------ -24== --VKSL------------------------------------------------------ -25== --VASL------------------------------------------------------ -26== --VASL------------------------------------------------------ -27== --AKEL------------------------------------------------------ -28== --AKEL------------------------------------------------------ -29== --IVKL----------------------QK---------EVEECAN-------------- -30== --VVKL----------------------QK---------EVEECAN-------------- -31=p --VVKL----------------------QK---------EVEECAN-------------- -32== -------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG-- -33=p -------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG-- -34== -------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-- -35=p -------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-- -36== AAVVAVIGRPLPTISETTTTGFTNVSS----NNTSP---EKQSCANGLEADPPTTGYGAV - - -1== -------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H -2== -------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H -3== -------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N -4=p -------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N -5=p -------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N -6== -------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-H -7== -------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-N -8=opsin, -------------ESESTQK------AEKEVTRMVVVMVLAFC----------------- -9== -------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-N -10== -------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-N -11== -------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-N -12== -------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-N -13== -------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-H -14== ----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-G -15== ----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-G -16== ----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G -17== ----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G -18== ----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-G -19== ----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-G -20== ----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K -21== ----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K -22== ----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-K -23== ----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-K -24== ----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-K -25== ----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-N -26== ----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-S -27== ----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-G -28== ----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-G -29== ----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC -30== ----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC -31=p ----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC -32== PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C -33=p SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C -34== -----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V -35=p -----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-V -36== AAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-C - * . : - -1== QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC--------GKNPL -2== QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC--------GKNPL -3== KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC--------GKNPF -4=p KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC--------GKNPL -5=p KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--------GKNPL -6== RGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC--------GKN-I -7== RNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC--------GKA-M -8=opsin, ------------------------------------------------------------ -9== PGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V -10== PGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF---------GKK-V -11== PGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF---------GKK-V -12== PGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF---------GKK-V -13== KGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY----QPQR-T -14== DKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------N -15== DKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------S -16== DKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------N -17== DKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------N -18== DTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HE-T -19== NAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HE-K -20== F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V -21== F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V -22== F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GK-V -23== I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GN-T -24== I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GT-T -25== L-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------A-A -26== SGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GS-G -27== PAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKE-C -28== PLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKE-T -29== GTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR-- -30== GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR----NINR-- -31=p GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR----NINR-- -32== ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RQ---- -33=p ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC----RR---- -34== DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR-- -35=p DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR----RGHR-- -36== DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV----RRRR-- - - -1== GDDE--SGASTSKTEVSSVS-TSPVSPA-------------------------------- -2== GDDE--ASATASKTE------TSQVAPA-------------------------------- -3== GDEDVSSTVSQSKTEVSSVS-SSQVSPA-------------------------------- -4=p GDDE-SSTVSTSKTEVSS------VSPA-------------------------------- -5=p GDEE-SSTVSTSKTEVSS------VSPA-------------------------------- -6== EEDE--ASTSSQVTQVSS------VAPEK------------------------------- -7== TDES--DTCSSQKTEVSTVS-STQVGPN-------------------------------- -8=opsin, ------------------------------------------------------------ -9== DDGS--ELSSASKTEVSSV---SSVSPA-------------------------------- -10== DDGS--ELSSASKTEVSSV---SSVSPA-------------------------------- -11== DDGS--EASTTSRTEVSSVS-NSSVAPA-------------------------------- -12== DDGS--EVST-SRTEVSSVS-NSSVSPA-------------------------------- -13== GKAS--PGTPGPHADVTAAGLRNKVMPAHPV----------------------------- -14== EKAP--ESSAVASTSTTQEP--QQTTAA-------------------------------- -15== EKAP--ESRAAISTSTTQEQ--QQTTAA-------------------------------- -16== EKSG--EISSAQST-TTQEQ--QQTTAA-------------------------------- -17== EKSG--EASSAQST-TTQEQ-TQQTSAA-------------------------------- -18== ETKS--NDDSQSNSTVAQDK-A-------------------------------------- -19== DPND--VEENQSSNTQTQEK-S-------------------------------------- -20== DDGK--SSDAQSQA-TASEA-ESKA----------------------------------- -21== DDGK--SSDAQSQA-TASEA-ESKA----------------------------------- -22== DDGK--SSEAQSQA-TTSEA-ESKA----------------------------------- -23== DEPK--PDAPASDTETTSEA-DSKA----------------------------------- -24== DEPK--PDAPPSDTETTSEA-ESKD----------------------------------- -25== EPSS--DAVSTTSGTTTVTD-NEKSNA--------------------------------- -26== ESGS--DVKSEASATTTMEE-KPKIPEA-------------------------------- -27== EDAN--DAEEEVVASER--G-GESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP -28== EDDK--DAETEIPAGESSDA-APSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP -29== ------KLSAAGMHEALKLAERPERPEF------------VLQNADY------------- -30== ------KLSAAGMHEALKLAERPERSEF------------VLQNSDH------------- -31=p ------KLSAAGMHEALKLAERPERSEF------------VLQNSDH------------- -32== ------------------------------------------------------------ -33=p ------------------------------------------------------------ -34== ------------------------------------------------------------ -35=p ------------------------------------------------------------ -36== ------A---------------PQ------------------------------------ - - -1== --------------------------------------------------------- -2== --------------------------------------------------------- -3== --------------------------------------------------------- -4=p --------------------------------------------------------- -5=p --------------------------------------------------------- -6== --------------------------------------------------------- -7== --------------------------------------------------------- -8=opsin, --------------------------------------------------------- -9== --------------------------------------------------------- -10== --------------------------------------------------------- -11== --------------------------------------------------------- -12== --------------------------------------------------------- -13== --------------------------------------------------------- -14== --------------------------------------------------------- -15== --------------------------------------------------------- -16== --------------------------------------------------------- -17== --------------------------------------------------------- -18== --------------------------------------------------------- -19== --------------------------------------------------------- -20== --------------------------------------------------------- -21== --------------------------------------------------------- -22== --------------------------------------------------------- -23== --------------------------------------------------------- -24== --------------------------------------------------------- -25== --------------------------------------------------------- -26== --------------------------------------------------------- -27== QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA -28== QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA -29== --------------------------------------------CRKKGHDS----- -30== --------------------------------------------CGKKGHDT----- -31=p --------------------------------------------CGKKGHDT----- -32== --------------------------------------------------------- -33=p --------------------------------------------------------- -34== --------------------------------------------------------- -35=p --------------------------------------------------------- -36== --------------------------------------------------------- -
--- a/test-data/sample.fa Tue Oct 31 15:48:53 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,285 +0,0 @@ -> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] -MNGTEGDNFYVPFSNKTGLARSPYEYPQYYLAEPWKYSALAAYMFFLILVGFPVNFLTLF -VTVQHKKLRTPLNYILLNLAMANLFMVLFGFTVTMYTSMNGYFVFGPTMCSIEGFFATLG -GEVALWSLVVLAIERYIVICKPMGNFRFGNTHAIMGVAFTWIMALACAAPPLVGWSRYIP -EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES -ASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFTHQGSDFGATFMTLPAFFAKSSAL -YNPVIYILMNKQFRNCMITTLCCGKNPLGDDESGASTSKTEVSSVSTSPVSPA -> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94] -MNGTEGPNFYVPFSNITGVVRSPFEQPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY -VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG -GEIGLWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP -EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES -ATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFTHQGSNFGPIFMTLPAFFAKTASI -YNPIIYIMMNKQFRNCMLTSLCCGKNPLGDDEASATASKTETSQVAPA -> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9 -MNGTEGINFYVPMSNKTGVVRSPFEYPQYYLAEPWKYRLVCCYIFFLISTGLPINLLTLL -VTFKHKKLRQPLNYILVNLAVADLFMACFGFTVTFYTAWNGYFVFGPVGCAVEGFFATLG -GQVALWSLVVLAIERYIVVCKPMGNFRFSATHAMMGIAFTWVMAFSCAAPPLFGWSRYMP -EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES -ATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFTNKGADFTATLMAVPAFFSKSSSL -YNPIIYVLMNKQFRNCMITTICCGKNPFGDEDVSSTVSQSKTEVSSVSSSQVSPA -> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish -MNGTEGKNFYVPMSNRTGLVRSPFEYPQYYLAEPWQFKILALYLFFLMSMGLPINGLTLV -VTAQHKKLRQPLNFILVNLAVAGTIMVCFGFTVTFYTAINGYFVLGPTGCAVEGFMATLG -GEVALWSLVVLAIERYIVVCKPMGSFKFSSSHAFAGIAFTWVMALACAAPPLFGWSRYIP -EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS -ASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFFNKGADFSAKFMAIPAFFSKSSAL -YNPVIYVLLNKQFRNCMLTTIFCGKNPLGDDESSTVSTSKTEVSSVSPA -> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish -MNGTEGNNFYVPLSNRTGLVRSPFEYPQYYLAEPWQFKLLAVYMFFLICLGLPINGLTLI -CTAQHKKLRQPLNFILVNLAVAGAIMVCFGFTVTFYTAINGYFALGPTGCAVEGFMATLG -GEVALWSLVVLAIERYIVVCKPMGSFKFSSTHASAGIAFTWVMAMACAAPPLVGWSRYIP -EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS -ASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFFNKGAAFSAQFMAIPAFFSKTSAL -YNPVIYVLLNKQFRSCMLTTLFCGKNPLGDEESSTVSTSKTEVSSVSPA -> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208- -MKQVPEFHEDFYIPIPLDINNLSAYSPFLVPQDHLGNQGIFMAMSVFMFFIFIGGASINI -LTILCTIQFKKLRSHLNYILVNLSIANLFVAIFGSPLSFYSFFNRYFIFGATACKIEGFL -ATLGGMVGLWSLAVVAFERWLVICKPLGNFTFKTPHAIAGCILPWISALAASLPPLFGWS -RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKA -QADSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVSHRGEEFDLRMATIPSCLSK -ASTVYNPVIYVLMNKQFRSCMMKMVCGKNIEEDEASTSSQVTQVSSVAPEK -> 7== M13299 1 human BCP <>[Science232(4747),193-202'86] -MRKMSEEEFYLFKNISSVGPWDGPQYHIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATL -RYKKLRQPLNYILVNVSFGGFLLCIFSVFPVFVASCNGYFVFGRHVCALEGFLGTVAGLV -TGWSLAFLAFERYIVICKPFGNFRFSSKHALTVVLATWTIGIGVSIPPFFGWSRFIPEGL -QCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESATT -QKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVNNRNHGLDLRLVTIPSFFSKSACIYNP -IIYCFMNKQFQACIMKMVCGKAMTDESDTCSSQKTEVSTVSSTQVGPN -> 8=opsin, greensensitive human (fragment) S07060 -DLAETVIASTISIVNQVSGYFVLGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKP -FGNVRFDAKLAIVGIAFSWIWAAVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQS -YMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFC -> 9== K03494 1 human GCP <>[Science232(4747),193-202'86] -MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM -IFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISVVNQVYGYFV -LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNVRFDAKLAIVGIAFSWIWA -AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYL -QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAANPGYPFH -PLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS -VSPA -> 10== Z68193 1 human Red Opsin <>[] -MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM -IFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISIVNQVSGYFV -LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRFDAKLAIVGIAFSWIWS -AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYL -QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAANPGYAFH -PLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS -VSPA -> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92] -MTEAWNVAVFAARRSRDDDDTTRGSVFTYTNTNNTRGPFEGPNYHIAPRWVYNLVSFFMI -IVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVASTISVFNQIFGYFIL -GHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNIKFDSKLAIIGIVFSWVWAW -GWSAPPIFGWSRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQ -VWMAIRAVAAQQKESESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAANPGYAFHP -LAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLFGKKVDDGSEASTTSRTEVSSVSNS -SVAPA -> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] -MAAWEAAFAARRRHEEEDTTRDSVFTYTNSNNTRGPFEGPNYHIAPRWVYNLTSVWMIFV -VAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIASTISVINQISGYFILGH -PMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNIKFDGKLAVAGILFSWLWSCAW -TAPPIFGWSRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVW -LAIRAVAAQQKESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAANPGYAFHPLA -AALPAYFAKSATIYNPIIYVFMNRQFRNCILQLFGKKVDDGSEVSTSRTEVSSVSNSSVS -PA -> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] -MSSNSSQAPPNGTPGPFDGPQWPYQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYK -KLRSPLNYILVNLAVADLLVTLCGSSVSLSNNINGFFVFGRRMCELEGFMVSLTGIVGLW -SLAILALERYVVVCKPLGDFQFQRRHAVSGCAFTWGWALLWSAPPLLGWSSYVPEGLRTS -CGPNWYTGGSNNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEADTTQRAER -EVTRMVIVMVMAFLLCWLPYSTFALVVATHKGIIIQPVLASLPSYFSKTATVYNPIIYVF -MNKQFQSCLLEMLCCGYQPQRTGKASPGTPGPHADVTAAGLRNKVMPAHPV -> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] -MESGNVSSSLFGNVSTALRPEARLSAETRLLGWNVPPEELRHIPEHWLTYPEPPESMNYL -LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVKTPIFIYNSFH -QGYALGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEGKMTHGKAIAMIIFIY -MYATPWVVACYTETWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYY -YSQIVGHVFSHEKALRDQAKKMNVESLRSNVDKNKETAEIRIAKAAITICFLFFCSWTPY -GVMSLIGAFGDKTLLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLALNE -KAPESSAVASTSTTQEPQQTTAA -> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 -MEYHNVSSVLGNVSSVLRPDARLSAESRLLGWNVPPDELRHIPEHWLIYPEPPESMNYLL -GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIKTPIFIYNSFHQ -GYALGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEGKMTHGKAIAMIIFIYL -YATPWVVACYTESWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYYY -SQIVGHVFSHEKALRDQAKKMNVDSLRSNVDKSKEAAEIRIAKAAITICFLFFASWTPYG -VMSLIGAFGDKTLLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAISEK -APESRAAISTSTTQEQQQTTAA -> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] -MEPLCNASEPPLRPEARSSGNGDLQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGVF -YIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFNSFHRGFAIY -LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYCT -PWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFFFSFVCPTLMILYYYSQI -VGHVFSHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMS -LIGAFGDKSLLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGVNEKSGE -ISSAQSTTTQEQQQTTAA -> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 -MDALCNASEPPLRPEARMSSGSDELQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGV -FYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFIYNSFHRGF -ALGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYC -TPWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFLFSFVVPTLMILYYYSQ -IVGHVFNHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM -SLIGAFGDKSLLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGVNEKSG -EASSAQSTTTQEQTQQTSAA -> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1 -MTNATGPQMAYYGAASMDFGYPEGVSIVDFVRPEIKPYVHQHWYNYPPVNPMWHYLLGVI -YLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTNVPFFTYNCFSGGV -WMFSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGPKLTTGKAVVFALISWV -IAIGCALPPFFGWGNYILEGILDSCSYDYLTQDFNTFSYNIFIFVFDYFLPAAIIVFSYV -FIVKAIFAHEAAMRAQAKKMNVSTLRSNEADAQRAEIRIAKTALVNVSLWFICWTPYALI -SLKGVMGDTSGITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCVHETET -KSNDDSQSNSTVAQDKA -> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1 -MANVTGPQMAFYGSGAATFGYPEGMTVADFVPDRVKHMVLDHWYNYPPVNPMWHYLLGVV -YLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTNFPPFCYNCFSGGR -WMFSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGPKLTQGKATFMCGLAWV -ISVGWSLPPFFGWGSYTLEGILDSCSYDYFTRDMNTITYNICIFIFDFFLPASVIVFSYV -FIVKAIFAHEAAMRAQAKKMNVTNLRSNEAETQRAEIRIAKTALVNVSLWFICWTPYAAI -TIQGLLGNAEGITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCVHEKDP -NDVEENQSSNTQTQEKS -> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] -MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI -GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP -MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKMYVPEGNLTSC -GIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSL -RSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLFKFEGLTPLNTIWGACFAKS -AACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDAQSQATASEAESKA -> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] -MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI -GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP -MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSSIW -CLAPAFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAA -VSAHEKAMREQAKKMNVKSLRSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGL -FKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDA -QSQATASEAESKA -> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' -MDSFAAVATQLGPQFAAPSNGSVVDKVTPDMAHLISPYWDQFPAMDPIWAKILTAYMIII -GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP -MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSTIW -CCLAPVFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIA -AVSAHEKAMREQAKKMNVKSLRSSEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMG -LFKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSE -AQSQATTSEAESKA -> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] -MERSHLPETPFDLAHSGPRFQAQSSGNGSVLDNVLPDMAHLVNPYWSRFAPMDPMMSKIL -GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY -ETWVLGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKILFI -WMMAVFWTVMPLIGWSAYVPEGNLTACSIDYMTRMWNPRSYLITYSLFVYYTPLFLICYS -YWFIIAAVAAHEKAMREQAKKMNVKSLRSSEDCDKSAEGKLAKVALTTISLWFMAWTPYL -VICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVFGNTD -EPKPDAPASDTETTSEADSKA -> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 -MERSLLPEPPLAMALLGPRFEAQTGGNRSVLDNVLPDMAPLVNPHWSRFAPMDPTMSKIL -GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY -ETWVLGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKIAFI -WMMAVFWTIMPLIGWSSYVPEGNLTACSIDYMTRQWNPRSYLITYSLFVYYTPLFMICYS -YWFIIATVAAHEKAMRDQAKKMNVKSLRSSEDCDKSAENKLAKVALTTISLWFMAWTPYL -IICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVCGTTD -EPKPDAPPSDTETTSEAESKD -> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] -MIAVSGPSYEAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYPPLNPMWHGILGFVIG -MLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCMSPPMVINCYYETWVL -GPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGKPLSINGALIRIIAIWLFSL -GWTIAPMFGWNRYVPEGNMTACGTDYFNRGLLSASYLVCYGIWVYFVPLFLIIYSYWFII -QAVAAHEKNMREQAKKMNVASLRSSENQNTSAECKLAKVALMTISLWFMAWTPYLVINFS -GIFNLVKISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLACAAEPSSDAV -STTSGTTTVTDNEKSNA -> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] -MANQLSYSSLGWPYQPNASVVDTMPKEMLYMIHEHWYAFPPMNPLWYSILGVAMIILGII -CVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFMMPTMTSNCFAETWILGPFMC -EVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAAPLTHKKATLLLLFVWIWSGGWTIL -PFFGWSRYVPEGNLTSCTVDYLTKDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAE -HEKQLREQAKKMNVASLRANADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVFS -SGTRLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLACGSGESGSDVKSE -ASATTTMEEKPKIPEA -> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] -MVESTTLVNQTWWYNPTVDIHPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLF -SKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFMKKWIFGKVACQLYGLLGGIFG -FMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVWSVGPVFNWGAYVP -EGILTSCSFDYLSTDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAK -RLNAKELRKAQAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQFGPAEWVTPYAAEL -PVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECEDANDAEEEVVASER -GGESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGYPPQGYPPQGAYPPPQGYPPQGYPP -QGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA -> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93] -MGRDLRDNETWWYNPSIVVHPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFT -KTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFLKKWIFGFAACKVYGFIGGIFGF -MSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLWAIGPIFGWGAYTLE -GVLCNCSFDYISRDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR -LNAKELRKAQAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQFGPLEWVTPYAAQLP -VMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDDKDAETEIPAGESS -DAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGYPPQGYPPQGY -PPPPQGAPPQGAPPAAPPQGVDNQAYQA -> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra -MMDVNSSGRPDLYGHLRSFLLPEVGRGLPDLSPDGGADPVAGSWAPHLLSEVTASPAPTW -DAPPDNASGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLI -VSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAMDVMCCTASIMTLCVISIDR -YLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGWAQNVNDDKVCLISQDFGYT -IYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRVEPDSVIALNGIVKLQKEVE -ECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSC -IPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNINRKLSAAGMHEALK -LAERPERPEFVLQNADYCRKKGHDS -> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] -MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL -VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM -DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW -AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV -QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL -PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQC -QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT -> 31=p A47425 serotonin receptor 5HT-7 - rat -MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL -VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM -DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW -AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV -QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL -PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQC -QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT -> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] -MDVLSPGQGNNTTSPPAPFETGGNTTGISDVTVSYQVITSLLLGTLIFCAVLGNACVVAA -IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC -TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED -RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT -RHGASPAPQPKKSVNGESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGN -SKEHLPLPSEAGPTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP -FFIVALVLPFCESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC -RQ -> 33=p A35181 serotonin receptor class 1A - rat -MDVFSFGQGNNTTASQEPFGTGGNVTSISDVTFSYQVITSLLLGTLIFCAVLGNACVVAA -IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC -TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED -RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT -SLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGN -SKEHLPLPSESGSNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP -FFIVALVLPFCESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC -RR -> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] -MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS -HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV -MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT -ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP -DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE -ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA -NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK -LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL -NPIIYTIFSPEFRSAFQKILFGKYRRGHR -> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail -MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS -HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV -MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT -ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP -DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE -ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA -NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK -LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL -NPIIYTIFSPEFRSAFQKILFGKYRRGHR -> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi -MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVPNTTWWQASAPFDTPAALVR -AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLVMPLGAV -YEVVQRWTLGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTNIDYIHASTAKRVGMM -IACVWTVSFFVCIAQLLGWKDPDWNQRVSEDLRCVVSQDVGYQIFATASSFYVPVLIILI -LYWRIYQTARKRIRRRRGATARGGVGPPPVPAGGALVAGGGSGGIAAAVVAVIGRPLPTI -SETTTTGFTNVSSNNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKR -ERKAAKTLAIITGAFVACWLPFFVLAILVPTCDCEVSPVLTSLSLWLGYFNSTLNPVIYT -VFSPEFRHAFQRLLCGRRVRRRRAPQ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_amino.fa Wed Mar 20 07:34:52 2024 +0000 @@ -0,0 +1,285 @@ +> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] +MNGTEGDNFYVPFSNKTGLARSPYEYPQYYLAEPWKYSALAAYMFFLILVGFPVNFLTLF +VTVQHKKLRTPLNYILLNLAMANLFMVLFGFTVTMYTSMNGYFVFGPTMCSIEGFFATLG +GEVALWSLVVLAIERYIVICKPMGNFRFGNTHAIMGVAFTWIMALACAAPPLVGWSRYIP +EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES +ASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFTHQGSDFGATFMTLPAFFAKSSAL +YNPVIYILMNKQFRNCMITTLCCGKNPLGDDESGASTSKTEVSSVSTSPVSPA +> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94] +MNGTEGPNFYVPFSNITGVVRSPFEQPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY +VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG +GEIGLWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP +EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES +ATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFTHQGSNFGPIFMTLPAFFAKTASI +YNPIIYIMMNKQFRNCMLTSLCCGKNPLGDDEASATASKTETSQVAPA +> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9 +MNGTEGINFYVPMSNKTGVVRSPFEYPQYYLAEPWKYRLVCCYIFFLISTGLPINLLTLL +VTFKHKKLRQPLNYILVNLAVADLFMACFGFTVTFYTAWNGYFVFGPVGCAVEGFFATLG +GQVALWSLVVLAIERYIVVCKPMGNFRFSATHAMMGIAFTWVMAFSCAAPPLFGWSRYMP +EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES +ATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFTNKGADFTATLMAVPAFFSKSSSL +YNPIIYVLMNKQFRNCMITTICCGKNPFGDEDVSSTVSQSKTEVSSVSSSQVSPA +> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish +MNGTEGKNFYVPMSNRTGLVRSPFEYPQYYLAEPWQFKILALYLFFLMSMGLPINGLTLV +VTAQHKKLRQPLNFILVNLAVAGTIMVCFGFTVTFYTAINGYFVLGPTGCAVEGFMATLG +GEVALWSLVVLAIERYIVVCKPMGSFKFSSSHAFAGIAFTWVMALACAAPPLFGWSRYIP +EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS +ASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFFNKGADFSAKFMAIPAFFSKSSAL +YNPVIYVLLNKQFRNCMLTTIFCGKNPLGDDESSTVSTSKTEVSSVSPA +> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish +MNGTEGNNFYVPLSNRTGLVRSPFEYPQYYLAEPWQFKLLAVYMFFLICLGLPINGLTLI +CTAQHKKLRQPLNFILVNLAVAGAIMVCFGFTVTFYTAINGYFALGPTGCAVEGFMATLG +GEVALWSLVVLAIERYIVVCKPMGSFKFSSTHASAGIAFTWVMAMACAAPPLVGWSRYIP +EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS +ASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFFNKGAAFSAQFMAIPAFFSKTSAL +YNPVIYVLLNKQFRSCMLTTLFCGKNPLGDEESSTVSTSKTEVSSVSPA +> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208- +MKQVPEFHEDFYIPIPLDINNLSAYSPFLVPQDHLGNQGIFMAMSVFMFFIFIGGASINI +LTILCTIQFKKLRSHLNYILVNLSIANLFVAIFGSPLSFYSFFNRYFIFGATACKIEGFL +ATLGGMVGLWSLAVVAFERWLVICKPLGNFTFKTPHAIAGCILPWISALAASLPPLFGWS +RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKA +QADSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVSHRGEEFDLRMATIPSCLSK +ASTVYNPVIYVLMNKQFRSCMMKMVCGKNIEEDEASTSSQVTQVSSVAPEK +> 7== M13299 1 human BCP <>[Science232(4747),193-202'86] +MRKMSEEEFYLFKNISSVGPWDGPQYHIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATL +RYKKLRQPLNYILVNVSFGGFLLCIFSVFPVFVASCNGYFVFGRHVCALEGFLGTVAGLV +TGWSLAFLAFERYIVICKPFGNFRFSSKHALTVVLATWTIGIGVSIPPFFGWSRFIPEGL +QCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESATT +QKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVNNRNHGLDLRLVTIPSFFSKSACIYNP +IIYCFMNKQFQACIMKMVCGKAMTDESDTCSSQKTEVSTVSSTQVGPN +> 8=opsin, greensensitive human (fragment) S07060 +DLAETVIASTISIVNQVSGYFVLGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKP +FGNVRFDAKLAIVGIAFSWIWAAVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQS +YMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFC +> 9== K03494 1 human GCP <>[Science232(4747),193-202'86] +MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM +IFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISVVNQVYGYFV +LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNVRFDAKLAIVGIAFSWIWA +AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYL +QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAANPGYPFH +PLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS +VSPA +> 10== Z68193 1 human Red Opsin <>[] +MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM +IFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISIVNQVSGYFV +LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRFDAKLAIVGIAFSWIWS +AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYL +QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAANPGYAFH +PLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS +VSPA +> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92] +MTEAWNVAVFAARRSRDDDDTTRGSVFTYTNTNNTRGPFEGPNYHIAPRWVYNLVSFFMI +IVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVASTISVFNQIFGYFIL +GHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNIKFDSKLAIIGIVFSWVWAW +GWSAPPIFGWSRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQ +VWMAIRAVAAQQKESESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAANPGYAFHP +LAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLFGKKVDDGSEASTTSRTEVSSVSNS +SVAPA +> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] +MAAWEAAFAARRRHEEEDTTRDSVFTYTNSNNTRGPFEGPNYHIAPRWVYNLTSVWMIFV +VAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIASTISVINQISGYFILGH +PMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNIKFDGKLAVAGILFSWLWSCAW +TAPPIFGWSRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVW +LAIRAVAAQQKESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAANPGYAFHPLA +AALPAYFAKSATIYNPIIYVFMNRQFRNCILQLFGKKVDDGSEVSTSRTEVSSVSNSSVS +PA +> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] +MSSNSSQAPPNGTPGPFDGPQWPYQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYK +KLRSPLNYILVNLAVADLLVTLCGSSVSLSNNINGFFVFGRRMCELEGFMVSLTGIVGLW +SLAILALERYVVVCKPLGDFQFQRRHAVSGCAFTWGWALLWSAPPLLGWSSYVPEGLRTS +CGPNWYTGGSNNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEADTTQRAER +EVTRMVIVMVMAFLLCWLPYSTFALVVATHKGIIIQPVLASLPSYFSKTATVYNPIIYVF +MNKQFQSCLLEMLCCGYQPQRTGKASPGTPGPHADVTAAGLRNKVMPAHPV +> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] +MESGNVSSSLFGNVSTALRPEARLSAETRLLGWNVPPEELRHIPEHWLTYPEPPESMNYL +LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVKTPIFIYNSFH +QGYALGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEGKMTHGKAIAMIIFIY +MYATPWVVACYTETWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYY +YSQIVGHVFSHEKALRDQAKKMNVESLRSNVDKNKETAEIRIAKAAITICFLFFCSWTPY +GVMSLIGAFGDKTLLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLALNE +KAPESSAVASTSTTQEPQQTTAA +> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 +MEYHNVSSVLGNVSSVLRPDARLSAESRLLGWNVPPDELRHIPEHWLIYPEPPESMNYLL +GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIKTPIFIYNSFHQ +GYALGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEGKMTHGKAIAMIIFIYL +YATPWVVACYTESWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYYY +SQIVGHVFSHEKALRDQAKKMNVDSLRSNVDKSKEAAEIRIAKAAITICFLFFASWTPYG +VMSLIGAFGDKTLLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAISEK +APESRAAISTSTTQEQQQTTAA +> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] +MEPLCNASEPPLRPEARSSGNGDLQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGVF +YIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFNSFHRGFAIY +LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYCT +PWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFFFSFVCPTLMILYYYSQI +VGHVFSHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMS +LIGAFGDKSLLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGVNEKSGE +ISSAQSTTTQEQQQTTAA +> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 +MDALCNASEPPLRPEARMSSGSDELQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGV +FYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFIYNSFHRGF +ALGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYC +TPWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFLFSFVVPTLMILYYYSQ +IVGHVFNHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM +SLIGAFGDKSLLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGVNEKSG +EASSAQSTTTQEQTQQTSAA +> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1 +MTNATGPQMAYYGAASMDFGYPEGVSIVDFVRPEIKPYVHQHWYNYPPVNPMWHYLLGVI +YLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTNVPFFTYNCFSGGV +WMFSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGPKLTTGKAVVFALISWV +IAIGCALPPFFGWGNYILEGILDSCSYDYLTQDFNTFSYNIFIFVFDYFLPAAIIVFSYV +FIVKAIFAHEAAMRAQAKKMNVSTLRSNEADAQRAEIRIAKTALVNVSLWFICWTPYALI +SLKGVMGDTSGITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCVHETET +KSNDDSQSNSTVAQDKA +> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1 +MANVTGPQMAFYGSGAATFGYPEGMTVADFVPDRVKHMVLDHWYNYPPVNPMWHYLLGVV +YLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTNFPPFCYNCFSGGR +WMFSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGPKLTQGKATFMCGLAWV +ISVGWSLPPFFGWGSYTLEGILDSCSYDYFTRDMNTITYNICIFIFDFFLPASVIVFSYV +FIVKAIFAHEAAMRAQAKKMNVTNLRSNEAETQRAEIRIAKTALVNVSLWFICWTPYAAI +TIQGLLGNAEGITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCVHEKDP +NDVEENQSSNTQTQEKS +> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] +MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI +GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP +MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKMYVPEGNLTSC +GIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSL +RSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLFKFEGLTPLNTIWGACFAKS +AACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDAQSQATASEAESKA +> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] +MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI +GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP +MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSSIW +CLAPAFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAA +VSAHEKAMREQAKKMNVKSLRSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGL +FKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDA +QSQATASEAESKA +> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' +MDSFAAVATQLGPQFAAPSNGSVVDKVTPDMAHLISPYWDQFPAMDPIWAKILTAYMIII +GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP +MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSTIW +CCLAPVFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIA +AVSAHEKAMREQAKKMNVKSLRSSEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMG +LFKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSE +AQSQATTSEAESKA +> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] +MERSHLPETPFDLAHSGPRFQAQSSGNGSVLDNVLPDMAHLVNPYWSRFAPMDPMMSKIL +GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY +ETWVLGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKILFI +WMMAVFWTVMPLIGWSAYVPEGNLTACSIDYMTRMWNPRSYLITYSLFVYYTPLFLICYS +YWFIIAAVAAHEKAMREQAKKMNVKSLRSSEDCDKSAEGKLAKVALTTISLWFMAWTPYL +VICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVFGNTD +EPKPDAPASDTETTSEADSKA +> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 +MERSLLPEPPLAMALLGPRFEAQTGGNRSVLDNVLPDMAPLVNPHWSRFAPMDPTMSKIL +GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY +ETWVLGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKIAFI +WMMAVFWTIMPLIGWSSYVPEGNLTACSIDYMTRQWNPRSYLITYSLFVYYTPLFMICYS +YWFIIATVAAHEKAMRDQAKKMNVKSLRSSEDCDKSAENKLAKVALTTISLWFMAWTPYL +IICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVCGTTD +EPKPDAPPSDTETTSEAESKD +> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] +MIAVSGPSYEAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYPPLNPMWHGILGFVIG +MLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCMSPPMVINCYYETWVL +GPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGKPLSINGALIRIIAIWLFSL +GWTIAPMFGWNRYVPEGNMTACGTDYFNRGLLSASYLVCYGIWVYFVPLFLIIYSYWFII +QAVAAHEKNMREQAKKMNVASLRSSENQNTSAECKLAKVALMTISLWFMAWTPYLVINFS +GIFNLVKISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLACAAEPSSDAV +STTSGTTTVTDNEKSNA +> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] +MANQLSYSSLGWPYQPNASVVDTMPKEMLYMIHEHWYAFPPMNPLWYSILGVAMIILGII +CVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFMMPTMTSNCFAETWILGPFMC +EVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAAPLTHKKATLLLLFVWIWSGGWTIL +PFFGWSRYVPEGNLTSCTVDYLTKDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAE +HEKQLREQAKKMNVASLRANADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVFS +SGTRLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLACGSGESGSDVKSE +ASATTTMEEKPKIPEA +> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] +MVESTTLVNQTWWYNPTVDIHPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLF +SKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFMKKWIFGKVACQLYGLLGGIFG +FMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVWSVGPVFNWGAYVP +EGILTSCSFDYLSTDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAK +RLNAKELRKAQAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQFGPAEWVTPYAAEL +PVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECEDANDAEEEVVASER +GGESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGYPPQGYPPQGAYPPPQGYPPQGYPP +QGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA +> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93] +MGRDLRDNETWWYNPSIVVHPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFT +KTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFLKKWIFGFAACKVYGFIGGIFGF +MSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLWAIGPIFGWGAYTLE +GVLCNCSFDYISRDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR +LNAKELRKAQAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQFGPLEWVTPYAAQLP +VMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDDKDAETEIPAGESS +DAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGYPPQGYPPQGY +PPPPQGAPPQGAPPAAPPQGVDNQAYQA +> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra +MMDVNSSGRPDLYGHLRSFLLPEVGRGLPDLSPDGGADPVAGSWAPHLLSEVTASPAPTW +DAPPDNASGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLI +VSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAMDVMCCTASIMTLCVISIDR +YLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGWAQNVNDDKVCLISQDFGYT +IYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRVEPDSVIALNGIVKLQKEVE +ECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSC +IPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNINRKLSAAGMHEALK +LAERPERPEFVLQNADYCRKKGHDS +> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] +MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL +VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM +DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW +AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV +QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL +PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQC +QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT +> 31=p A47425 serotonin receptor 5HT-7 - rat +MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL +VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM +DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW +AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV +QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL +PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQC +QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT +> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] +MDVLSPGQGNNTTSPPAPFETGGNTTGISDVTVSYQVITSLLLGTLIFCAVLGNACVVAA +IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC +TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED +RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT +RHGASPAPQPKKSVNGESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGN +SKEHLPLPSEAGPTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP +FFIVALVLPFCESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC +RQ +> 33=p A35181 serotonin receptor class 1A - rat +MDVFSFGQGNNTTASQEPFGTGGNVTSISDVTFSYQVITSLLLGTLIFCAVLGNACVVAA +IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC +TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED +RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT +SLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGN +SKEHLPLPSESGSNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP +FFIVALVLPFCESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC +RR +> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] +MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS +HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV +MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT +ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP +DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE +ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA +NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK +LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL +NPIIYTIFSPEFRSAFQKILFGKYRRGHR +> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail +MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS +HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV +MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT +ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP +DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE +ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA +NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK +LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL +NPIIYTIFSPEFRSAFQKILFGKYRRGHR +> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi +MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVPNTTWWQASAPFDTPAALVR +AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLVMPLGAV +YEVVQRWTLGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTNIDYIHASTAKRVGMM +IACVWTVSFFVCIAQLLGWKDPDWNQRVSEDLRCVVSQDVGYQIFATASSFYVPVLIILI +LYWRIYQTARKRIRRRRGATARGGVGPPPVPAGGALVAGGGSGGIAAAVVAVIGRPLPTI +SETTTTGFTNVSSNNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKR +ERKAAKTLAIITGAFVACWLPFFVLAILVPTCDCEVSPVLTSLSLWLGYFNSTLNPVIYT +VFSPEFRHAFQRLLCGRRVRRRRAPQ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_nuc.fa Wed Mar 20 07:34:52 2024 +0000 @@ -0,0 +1,41 @@ +>MZ681498.1 Rotylenchus bunae isolate GE29E-RO6 small subunit ribosomal RNA gene, partial sequence +ATGCATGTATAAGTATAACCTGCCAGACAGGGAAACTGCGGACGGCTCATTACAACAGCCTTAATTTACT +TGACCTTGACAACCTACTTGGATAACTGCGGTAATTCTGGAGCTAATACATGCACCAAAGCTCCGATCCC +TCGTGGAGAGGAGCGCATTTGTTCGCCACAAAACCAAACGCCCCACAAGGGCGTTCAGTGTTGACTCAGA +ATAACTAAGCTGATCGCACGGTCTTGCACCGGCGACGTGTCTTTCAAGTGTCTGCCTTATCAACTTTCGA +TGGTAGTGTATCTGCCTACCATGGTTGTGACGGGTAACGGAGGATAAGGGTTCGACTCCGGAGAAGGGGC +CTGAGAAATGGCCACTACGTCTAAGGATGGCAGCAGGCGCGCAAATTACCCACTCTCAACACGCTGAGGA +GGTAGTGAAGAGAAATAACGAGACCGTTCTCACATGAGGCCGGTCATCGGAATGGGTACAACTTAAACCC +TTTAACGAGTATCTATGAGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCTCAAAATGCAT +AGAATTATTGCTGCGGTTAAAAAGCTCGTAGTTGGATCTGTGCTGGCCGCCCGGTCTGCTCGCTGAGCAC +GCACTGGTGCGGTCGGCTTTCCTGCCCGGTACCTCCCCGGCGTTGGCCTTCACCGGTCGGCGTCGGTGGC +CGGGCGAGTTTACTTTGAACAAATCAGAGTGCTTCAAACAGGCGTTTCGCTTGAATGTTCGTGCATGGAA +TAATAGAAGAGGATTTCGGTCCGATTTTGTTGGTTTTGCTGACCGAGATAATGGTTAACAGAGACAAACG +GGGCCATTCGTATTGCTACGTGAGAGGTG + +>MZ681497.1 Rotylenchus bunae isolate GE29A-R1 small subunit ribosomal RNA gene, partial sequence +CTGCGGACGGCTCATTACAACAGCCTTAATTTACTTGACCTTGACAACCTACTTGGATAACTGCGGTAAT +TCTGGAGCTAATACATGCACCAAAGCTCCGATCCCTCGTGGAGAGGAGCGCATTTGTTCGCCACAAAACC +AAACGCCCCACAAGGGCGTTCAGTGTTGACTCAGAATAACTAAGCTGATCGCACGGTCTTGCACCGGCGA +CGTGTCTTTCAAGTGTCTGCCTTATCAACTTTCGATGGTAGTGTATCTGCCTACCATGGTTGTGACGGGT +AACGGAGGATAAGGGTTCGACTCCGGAGAAGGGGCCTGAGAAATGGCCACTACGTCTAAGGATGGCAGCA +GGCGCGCAAATTACCCACTCTCAACACGCTGAGGAGGTAGTGAAGAGAAATAACGAGACCGTTCTCACAT +GAGGCCGGTCATCGGAATGGGTACAACTTAAACCCTTTAACGAGTATCTATGAGAGGGCAAGTCTGGTGC +CAGCAGCCGCGGTAATTCCAGCTCTCAAAATGCATAGAATTATTGCTGCGGTTAAAAAGCTCGTAGTTGG +ATCTGTGCTGGCCGCCCGGTCTGCTCGCTGAGCACGCACTGGTGCGGTCGGCTTTCCTGCCCGGTACCTC +CCCGGCGTTGGCCTTCACCGGTCGGCGTCGGTGGCCGGGCGAGTTTACTTTGAACAAATCAGAGTGCTTC +AAACAGGCGTTTCGCTTGAATGTTCGTGCATGGAATAATAGAAGAGGATTTCGGTCCGATTTTGTTGGTT +TTGCTGACCGAGATAATGGTTAACAGAGACAAACGGGGCCATTCGTATTGCTACGTGAGAGGTGAAATTC +TTGGACCGTAGCAAGACGGACTACAGC + +>ON855043.1 Rotylenchus sp. JQ-2022 internal transcribed spacer 1 and 5.8S ribosomal RNA gene, partial sequence +GGCCGTCACTCCGGGGGGACAACACAAACGCCCTGAGGGCTTTACTCGTTGGGGTGCAAACTGGATCGAG +TGGCGCACACATCCCTTCCACGCAAAGACCTGCTGAAGAGGTCGGAGGCGATGAGTCCGAGCAACCCCAC +AAGCAACCAGGTTGGGGAAGCTGCACACGATACTGGGATGCACGCCCCCAGGGCACCTAACGGCTGCCGC +TGGCGTCTGTGCGTCGTTGAGCAGTTGTTGCGCACTTGCTTTTGTCGGAGCTGTACTCGGAGCATGCTGG +CATGGACCCACACAAAAGTGTGTGGCAGCGGCCACACACCCCTGTCCATGTCCTACGGACCGTAGCTAGG +GCGTGCTCTGGGTTTCTTCGGCTGGCAGTGTTGCTACGTCCGTGGCTGTGATGAGACGACGCGGTAGGGC +CTTGTGCGATGCGCCTGCACTTGGCTTAAGACTTGATGAGCTCACTGCGAAGAGCCGCCAGCAACCTTTT +TTTCATATACATTTTTTACAGGCACACTTGTGTGCTGATGAACAAAAATTCTAGCCTTATCGGTGGATCA +CTCGGCTCGTAGGTCGATG +