# HG changeset patch # User rnateam # Date 1434632396 14400 # Node ID a4e086689fae41e0a3853e3db4033e492411e0d7 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mafft commit 1fc2105007248c6b9460c0f3a98e6589477d0b13 diff -r 000000000000 -r a4e086689fae mafft.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mafft.xml Thu Jun 18 08:59:56 2015 -0400 @@ -0,0 +1,152 @@ + + Multiple alignment program for amino acid or nucleotide sequences + + mafft + + + + + + + + + + + + #if $outputFormat.value == 'fasta' + $outputFasta + #elif $outputFormat.value == 'clustalw' + $outputClustalW + #end if + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + outputFormat == 'fasta' + + + outputFormat == 'clustalw' + + + + + + + + + + + + + + + + + + output] + +- G-INS-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information): + + - mafft --globalpair --maxiterate 1000 input [> output] + +- E-INS-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences): + + - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps. + + +**Speed-oriented methods:** + +- FFT-NS-i (iterative refinement method; two cycles only): + + - mafft --retree 2 --maxiterate 2 input [> output] + +- FFT-NS-i (iterative refinement method; max. 1000 iterations): + + - mafft --retree 2 --maxiterate 1000 input [> output] + +- FFT-NS-2 (fast; progressive method): + + - mafft --retree 2 --maxiterate 0 input [> output] + +- FFT-NS-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree): + + - mafft --retree 1 --maxiterate 0 input [> output] + +- NW-NS-i (iterative refinement method without FFT approximation; two cycles only): + + - mafft --retree 2 --maxiterate 2 --nofft input [> output] + +- NW-NS-2 (fast; progressive method without the FFT approximation): + + - mafft --retree 2 --maxiterate 0 --nofft input [> output] + +- NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm): + + - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output] + + ]]> + + + 10.1093/molbev/mst010 + + diff -r 000000000000 -r a4e086689fae test-data/mafft_fftns_result.aln --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mafft_fftns_result.aln Thu Jun 18 08:59:56 2015 -0400 @@ -0,0 +1,504 @@ +> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] +-------------------MNGTE------------------------GDNF-------- +YVP----F-SNKTGLARSPY----------------EYPQY-------YLAEPWK----- +----YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-F +TVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FR +FGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNN +ESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE------------------------ +---------------------------------------------------AAAAQQ--- +------------------------------------------------------------ +--------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT- +HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC---------GKN +PLGD-DE--SGASTSKTEVSSVS-TSPV-------------------------------- +--------------------------------------------SPA------------- +------ +> 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] +-------------------MNGTE------------------------GPNF-------- +YVP----F-SNITGVVRSPF----------------EQPQY-------YLAEPWQ----- +----FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-F +TTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FR +FGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNN +ESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE------------------------ +---------------------------------------------------AAAQQQ--- +------------------------------------------------------------ +--------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT- +HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC---------GKN +PLGD-DE--ASATASKTE------TSQV-------------------------------- +--------------------------------------------APA------------- +------ +> 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 +-------------------MNGTE------------------------GINF-------- +YVP----M-SNKTGVVRSPF----------------EYPQY-------YLAEPWK----- +----YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-F +TVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FR +FSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHN +ESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE------------------------ +---------------------------------------------------AAAQQQ--- +------------------------------------------------------------ +--------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT- +NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC---------GKN +PFGD-EDVSSTVSQSKTEVSSVS-SSQV-------------------------------- +--------------------------------------------SPA------------- +------ +> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish +-------------------MNGTE------------------------GKNF-------- +YVP----M-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ----- +----FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-F +TVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK +FSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNN +ESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA------------------------ +---------------------------------------------------AAAQQQ--- +------------------------------------------------------------ +--------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF- +NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC---------GKN +PLGD-DE-SSTVSTSKTEVSS------V-------------------------------- +--------------------------------------------SPA------------- +------ +> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish +-------------------MNGTE------------------------GNNF-------- +YVP----L-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ----- +----FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-F +TVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK +FSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNN +ESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA------------------------ +---------------------------------------------------AAAQQQ--- +------------------------------------------------------------ +--------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF- +NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC---------GKN +PLGD-EE-SSTVSTSKTEVSS------V-------------------------------- +--------------------------------------------SPA------------- +------ +> 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- +-------------------MKQVPEF----------------------HEDF-------- +YIPIPLDI-NNLS--AYSPF----------------LVPQD-------HLGNQGI----- +----FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-S +PLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FT +FKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNN +ESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL------------------------ +---------------------------------------------------AAKAQA--- +------------------------------------------------------------ +--------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS- +HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC---------GKN +-IEE-DE--ASTSSQVTQVSS------V-------------------------------- +--------------------------------------------APEK------------ +------ +> 7== M13299 1 human BCP <>[Science232(4747),193-202'86] +-------------------MRKMS------------------------EEEF-------- +YL-----F-KNIS--SVGPW----------------DGPQY-------HIAPVWA----- +----FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-V +FPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FR +FSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRS +ESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA------------------------ +---------------------------------------------------VAAQQQ--- +------------------------------------------------------------ +--------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN- +NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC---------GKA +-MTD-ES--DTCSSQKTEVSTVS-STQV-------------------------------- +--------------------------------------------GPN------------- +------ +> 8=opsin, greensensitive human (fragment) S07060 +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------------------DLAETVIA-S +TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR +FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV +QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA------------------------ +---------------------------------------------------VAKQQK--- +------------------------------------------------------------ +--------------ESESTQK------AEKEVTRMVVVMVLAFC---------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------ +> 9== K03494 1 human GCP <>[Science232(4747),193-202'86] +-------------------MAQQWSL----------QRLAGRHPQDSYEDST-------- +QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV----- +----YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S +TISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VR +FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV +QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA------------------------ +---------------------------------------------------VAKQQK--- +------------------------------------------------------------ +--------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA- +NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK +-VDD-GS--ELSSASKTEVSSV---SSV-------------------------------- +--------------------------------------------SPA------------- +------ +> 10== Z68193 1 human Red Opsin <>[] +-------------------MAQQWSL----------QRLAGRHPQDSYEDST-------- +QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV----- +----YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S +TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR +FDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV +QSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA------------------------ +---------------------------------------------------VAKQQK--- +------------------------------------------------------------ +--------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA- +NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK +-VDD-GS--ELSSASKTEVSSV---SSV-------------------------------- +--------------------------------------------SPA------------- +------ +> 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] +-------------------MTEAWNV----------AVFAARRSRDD-DDTT-------- +RGSV-FTY-TNTNN-TRGPF----------------EGPNY-------HIAPRWV----- +----YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-S +TISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IK +FDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGC +QSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA------------------------ +---------------------------------------------------VAAQQK--- +------------------------------------------------------------ +--------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA- +NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF----------GKK +-VDD-GS--EASTTSRTEVSSVS-NSSV-------------------------------- +--------------------------------------------APA------------- +------ +> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] +-------------------MA-AWEA----------AFAARRRHEE--EDTT-------- +RDSV-FTY-TNSNN-TRGPF----------------EGPNY-------HIAPRWV----- +----YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-S +TISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IK +FDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGV +QSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA------------------------ +---------------------------------------------------VAAQQK--- +------------------------------------------------------------ +--------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA- +NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF----------GKK +-VDD-GS--EVST-SRTEVSSVS-NSSV-------------------------------- +--------------------------------------------SPA------------- +------ +> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] +-------------------MS---------------------------SNSS-------- +QAP--------PNG-TPGPF----------------DGPQW------PYQAPQST----- +----YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-S +SVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQ +FQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN-- +NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA------------------------ +---------------------------------------------------AAAQQK--- +------------------------------------------------------------ +--------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT- +HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY-----QPQR +-TGK-AS--PGTPGPHADVTAAGLRNKV-------------------------------- +--------------------------------------------MPAHP---V------- +------ +> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] +----------MESGNVSSSLFGNVST----------ALRPEARL----SA---------- +-ETRLLGW--------NVPP----------------EELR--------HIPEHWLTYPEP +PESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--T +PIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--K +MTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDT +RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------ +---------------------------------------------------LRDQAKKM- +--------------------------------NVESL----------------------- +-----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF- +GDKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL--------N +EKAP-ES-SAVASTSTTQEP-QQ-TTAA-------------------------------- +------------------------------------------------------------ +------ +> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 +----------MEYHNVSSVL-GNVSS----------VLRPDARL----SA---------- +-ESRLLGW--------NVPP----------------DELR--------HIPEHWLIYPEP +PESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--T +PIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--K +MTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDT +RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------ +---------------------------------------------------LRDQAKKM- +--------------------------------NVDSL----------------------- +-----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF- +GDKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI--------S +EKAP-ES-RAAISTSTTQEQ-QQ-TTAA-------------------------------- +------------------------------------------------------------ +------ +> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] +----------ME------PLCNASEP----------PLRPEAR-----SSGN-------- +GDLQFLGW--------NVPP----------------DQIQ--------YIPEHWLTQLEP +PASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A +PIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N +MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT +RLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA------------------------ +---------------------------------------------------LREQAKKM- +--------------------------------NVESL----------------------- +-----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF- +GDKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV--------N +EKSG-EI-SSAQST-TTQEQ-QQ-TTAA-------------------------------- +------------------------------------------------------------ +------ +> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 +----------MD------ALCNASEP----------PLRPEARM----SSGS-------- +DELQFLGW--------NVPP----------------DQIQ--------YIPEHWLTQLEP +PASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A +PIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N +MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT +RLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA------------------------ +---------------------------------------------------LREQAKKM- +--------------------------------NVESL----------------------- +-----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF- +GDKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV--------N +EKSG-EA-SSAQST-TTQEQTQQ-TSAA-------------------------------- +------------------------------------------------------------ +------ +> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 +-------------------MTNATGP----------QMAYYGAA----SMD--------- +-----FGYPEGVSIVDFVRP----------------EIKP--------YVHQHWYNYPPV +NPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-V +PFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PK +LTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNT +FSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA------------------------ +---------------------------------------------------MRAQAKKM- +--------------------------------NVSTL----------------------- +-----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM- +GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HET +ETKS-ND-DSQSNSTVAQDKA--------------------------------------- +------------------------------------------------------------ +------ +> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 +-------------------MANVTGP----------QMAFYGSG----AAT--------- +-----FGYPEGMTVADFVPD----------------RVKH--------MVLDHWYNYPPV +NPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-F +PPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PK +LTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNT +ITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA------------------------ +---------------------------------------------------MRAQAKKM- +--------------------------------NVTNL----------------------- +-----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL- +GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HEK +DPND-VE-ENQSSNTQTQEKS--------------------------------------- +------------------------------------------------------------ +------ +> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] +----------ME---SFAVAAAQLGP----------HFAPLS------------------ +----------NGSVVDKVTP----------------DMAH--------LISPYWNQFPAM +DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T +PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP +MTIPLALGKM---------------------------YVPEGNLTSCGIDYLE--RDWNP +RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------ +---------------------------------------------------MREQAKKM- +--------------------------------NVKSL----------------------- +-----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF- +KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV +DDGK-SS-DAQSQATASEAESKA------------------------------------- +------------------------------------------------------------ +------ +> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] +----------ME---SFAVAAAQLGP----------HFAPLS------------------ +----------NGSVVDKVTP----------------DMAH--------LISPYWNQFPAM +DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T +PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP +MTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP +RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------ +---------------------------------------------------MREQAKKM- +--------------------------------NVKSL----------------------- +-----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF- +KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV +DDGK-SS-DAQSQATASEAESKA------------------------------------- +------------------------------------------------------------ +------ +> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' +----------MD---SFAAVATQLGP----------QFAAPS------------------ +----------NGSVVDKVTP----------------DMAH--------LISPYWDQFPAM +DPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T +PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP +MTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP +RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------ +---------------------------------------------------MREQAKKM- +--------------------------------NVKSL----------------------- +-----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF- +KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV +DDGK-SS-EAQSQATTSEAESKA------------------------------------- +------------------------------------------------------------ +------ +> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] +-----MERSHLP---ETPFDLAHSGP----------RFQAQSSG---------------- +----------NGSVLDNVLP----------------DMAH--------LVNPYWSRFAPM +DPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S +PVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP +MTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNP +RSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA------------------------ +---------------------------------------------------MREQAKKM- +--------------------------------NVKSL----------------------- +-----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF- +KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GNT +DEPKPDA-PASDTETTSEADSKA------------------------------------- +------------------------------------------------------------ +------ +> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 +-----MERSLLP---EPPLAMALLGP----------RFEAQTGG---------------- +----------NRSVLDNVLP----------------DMAP--------LVNPHWSRFAPM +DPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S +PVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP +MTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNP +RSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA------------------------ +---------------------------------------------------MRDQAKKM- +--------------------------------NVKSL----------------------- +-----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF- +KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GTT +DEPKPDA-PPSDTETTSEAESKD------------------------------------- +------------------------------------------------------------ +------ +> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] +-------------------MIAVSGP----------SYEAFSYG----GQA--------- +----RF---NNQTVVDKVPP----------------DMLH--------LIDANWYQYPPL +NPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-S +PPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KP +LSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLS +ASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN------------------------ +---------------------------------------------------MREQAKKM- +--------------------------------NVASL----------------------- +-----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF- +NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------AA +EPSS-DA-VSTTSGTTTVTDNEK-SNA--------------------------------- +------------------------------------------------------------ +------ +> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] +---------------------MANQL----------SYSSLGWP----YQP--------- +----------NASVVDTMPK----------------EMLY--------MIHEHWYAFPPM +NPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-M +PTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-AP +LTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSS +ASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ------------------------ +---------------------------------------------------LREQAKKM- +--------------------------------NVASL----------------------- +-----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF- +SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GSG +ESGS-DV-KSEASATTTMEEKPK-IPEA-------------------------------- +------------------------------------------------------------ +------ +> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] +------------------------------------MVESTTLV----NQT--------- +-----WWY--NPTVD----------------------------------IHPHWAKFDPI +PDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGF +PLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKK +MSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPST +RSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE------------------------ +---------------------------------------------------MAAMAKRL- +--------------------------------NAKEL----------------------- +-----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF- +GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKEC +EDAN-DA-EEEVVASER--GGES-RDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP +QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQG---VDNQAYQA +------ +> 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] +-------------------------------------MGRDLRD----NET--------- +-----WWY--NPSIV----------------------------------VHPHWREFDQV +PDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGF +PLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKK +MSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTT +RSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE------------------------ +---------------------------------------------------MAAMAKRL- +--------------------------------NAKEL----------------------- +-----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF- +GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKET +EDDK-DA-ETEIPAGESSDAAPS-ADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP +QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQG---VDNQAYQA +------ +> 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] +------------------------------------------------------------ +-MPHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN--------Y +GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M +PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR +QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------- +-GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------------- +----------------------------------P--------GFPR----VQPES---- +---VISL-----------------NGVVKLQ--------KEVEECAN------------- +-----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI +CGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR--------- +-----NINRKLSAAGMHEALKLA------------------------------------- +-------------------------------------------ERPERSEFVLQNSDHCG +KKGHDT +> 31=p A47425 serotonin receptor 5HT-7 - rat +------------------------------------------------------------ +-MPHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN--------Y +GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M +PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR +QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------- +-GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------------- +----------------------------------P--------GFPR----VQPES---- +---VISL-----------------NGVVKLQ--------KEVEECAN------------- +-----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI +CGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR--------- +-----NINRKLSAAGMHEALKLA------------------------------------- +-------------------------------------------ERPERSEFVLQNSDHCG +KKGHDT +> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] +----------MD-------VLSPG------------QGNNTTSPPAPFETGG-------- +----------NTTGISDVTV---------------------------------------- +--SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L +PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK +RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH--------- +-GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------ +---------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR- +--------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG- +-PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF- +CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC--------- +-----RQ----------------------------------------------------- +------------------------------------------------------------ +------ +> 33=p A35181 serotonin receptor class 1A - rat +----------MD-------VFSFG------------QGNNTTASQEPFGTGG-------- +----------NVTSISDVTF---------------------------------------- +--SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L +PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK +RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH--------- +-GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------ +---------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG- +--------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG- +-SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF- +CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC--------- +-----RR----------------------------------------------------- +------------------------------------------------------------ +------ +> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] +MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG-------- +----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY +SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M +PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR +RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------- +-GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE +YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-- +--------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS- +------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF- +VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--------- +-----RGHR--------------------------------------------------- +------------------------------------------------------------ +------ +> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail +MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG-------- +----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY +SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M +PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR +RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------- +-GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE +YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-- +--------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS- +------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF- +VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--------- +-----RGHR--------------------------------------------------- +------------------------------------------------------------ +------ +> 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi +-MEGAEGQEELD-------WEAL-------YLRLP--LQNCSWNSTGWEPNW-------- +----------NVTVVPNTTW---------WQASAPFDTPAALVRAAAK------------ +--------AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-M +PLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHA +STAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV--------- +-GYQIFATASSFYVPVLIILILYWRIYQTARKRIR------------------------- +--------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGI +AAAVVAVIGRPLPTISETTTTGFTNVSS----NNTS---PEKQSCANGLEADPPTTGYGA +VAAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT- +CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV--------- +-----RRRRA-------------------------------------------------- +---------------------------------------------PQ------------- +------ diff -r 000000000000 -r a4e086689fae test-data/mafft_nwns_result.aln --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mafft_nwns_result.aln Thu Jun 18 08:59:56 2015 -0400 @@ -0,0 +1,496 @@ +CLUSTAL format alignment by MAFFT NW-NS-2 (v7.221) + + +1== -------------------MNGTE------------------------GDNF-------- +2== -------------------MNGTE------------------------GPNF-------- +3== -------------------MNGTE------------------------GINF-------- +4=p -------------------MNGTE------------------------GKNF-------- +5=p -------------------MNGTE------------------------GNNF-------- +6== -------------------MKQVPEF----------------------HEDF-------- +7== -------------------MRKMS------------------------EEEF-------- +8=opsin, ------------------------------------------------------------ +9== -------------------MAQQWSL----------QRLAGRHPQDSYEDST-------- +10== -------------------MAQQWSL----------QRLAGRHPQDSYEDST-------- +11== -------------------MTEAWNV----------AVFAARRSRDD-DDTT-------- +12== -------------------MA-AWEA----------AFAARRRHEE--EDTT-------- +13== -------------------MS---------------------------SNSS-------- +14== ----------MESGNVSSSLFGNVST----------ALRPEARL----SA---------- +15== ----------MEYHNVSSVL-GNVSS----------VLRPDARL----SA---------- +16== ----------ME------PLCNASEP----------PLRPEAR-----SSGN-------- +17== ----------MD------ALCNASEP----------PLRPEARM----SSGS-------- +18== -------------------MTNATGP----------QMAYYGAA----SMD--------- +19== -------------------MANVTGP----------QMAFYGSG----AAT--------- +20== ----------ME---SFAVAAAQLGP----------HFAPLS------------------ +21== ----------ME---SFAVAAAQLGP----------HFAPLS------------------ +22== ----------MD---SFAAVATQLGP----------QFAAPS------------------ +23== -----MERSHLP---ETPFDLAHSGP----------RFQAQSSG---------------- +24== -----MERSLLP---EPPLAMALLGP----------RFEAQTGG---------------- +25== -------------------MIAVSGP----------SYEAFSYG----GQA--------- +26== ---------------------MANQL----------SYSSLGWP----YQP--------- +27== ------------------------------------MVESTTLV----NQT--------- +28== -------------------------------------MGRDLRD----NET--------- +29== ---------MMD-------VNSSGRPDLYGHLRSF-LLPEVGRGLPDLSPDGGADPVAGS +30== ------------------------------------------------------------ +31=p ------------------------------------------------------------ +32== ----------MD-------VLSPG------------QGNNTTSPPAPFETGG-------- +33=p ----------MD-------VFSFG------------QGNNTTASQEPFGTGG-------- +34== MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG-------- +35=p MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG-------- +36== -MEGAEGQEELD-------WEAL-------YLRLP--LQNCSWNSTGWEPNW-------- + + +1== YVP----F-SNKTGLARSPY----------------EYPQY-------YLAEPWK----- +2== YVP----F-SNITGVVRSPF----------------EQPQY-------YLAEPWQ----- +3== YVP----M-SNKTGVVRSPF----------------EYPQY-------YLAEPWK----- +4=p YVP----M-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ----- +5=p YVP----L-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ----- +6== YIPIPLDI-NNLS--AYSPF----------------LVPQD-------HLGNQGI----- +7== YL-----F-KNIS--SVGPW----------------DGPQY-------HIAPVWA----- +8=opsin, ------------------------------------------------------------ +9== QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV----- +10== QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV----- +11== RGSV-FTY-TNTNN-TRGPF----------------EGPNY-------HIAPRWV----- +12== RDSV-FTY-TNSNN-TRGPF----------------EGPNY-------HIAPRWV----- +13== QAP--------PNG-TPGPF----------------DGPQW------PYQAPQST----- +14== -ETRLLGW--------NVPP----------------EELR--------HIPEHWLTYPEP +15== -ESRLLGW--------NVPP----------------DELR--------HIPEHWLIYPEP +16== GDLQFLGW--------NVPP----------------DQIQ--------YIPEHWLTQLEP +17== DELQFLGW--------NVPP----------------DQIQ--------YIPEHWLTQLEP +18== -----FGYPEGVSIVDFVRP----------------EIKP--------YVHQHWYNYPPV +19== -----FGYPEGMTVADFVPD----------------RVKH--------MVLDHWYNYPPV +20== ----------NGSVVDKVTP----------------DMAH--------LISPYWNQFPAM +21== ----------NGSVVDKVTP----------------DMAH--------LISPYWNQFPAM +22== ----------NGSVVDKVTP----------------DMAH--------LISPYWDQFPAM +23== ----------NGSVLDNVLP----------------DMAH--------LVNPYWSRFAPM +24== ----------NRSVLDNVLP----------------DMAP--------LVNPHWSRFAPM +25== ----RF---NNQTVVDKVPP----------------DMLH--------LIDANWYQYPPL +26== ----------NASVVDTMPK----------------EMLY--------MIHEHWYAFPPM +27== -----WWY--NPTVD----------------------------------IHPHWAKFDPI +28== -----WWY--NPSIV----------------------------------VHPHWREFDQV +29== WAPHLLS---EVTASPAPTW----------------DAPPDNASGCGEQIN--------Y +30== -MPHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN--------Y +31=p -MPHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN--------Y +32== ----------NTTGISDVTV---------------------------------------- +33=p ----------NVTSISDVTF---------------------------------------- +34== ----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY +35=p ----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY +36== ----------NVTVVPNTTW---------WQASAPFDTPAALVRAAAK------------ + + +1== ----YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-F +2== ----FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-F +3== ----YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-F +4=p ----FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-F +5=p ----FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-F +6== ----FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-S +7== ----FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-V +8=opsin, --------------------------------------------------DLAETVIA-S +9== ----YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S +10== ----YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S +11== ----YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-S +12== ----YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-S +13== ----YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-S +14== PESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--T +15== PESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--T +16== PASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A +17== PASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A +18== NPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-V +19== NPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-F +20== DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T +21== DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T +22== DPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T +23== DPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S +24== DPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S +25== NPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-S +26== NPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-M +27== PDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGF +28== PDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGF +29== GRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-M +30== GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M +31=p GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M +32== --SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L +33=p --SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L +34== SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M +35=p SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M +36== --------AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-M + . + +1== TVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FR +2== TTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FR +3== TVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FR +4=p TVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK +5=p TVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK +6== PLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FT +7== FPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FR +8=opsin, TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR +9== TISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VR +10== TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR +11== TISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IK +12== TISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IK +13== SVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQ +14== PIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--K +15== PIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--K +16== PIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N +17== PIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N +18== PFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PK +19== PPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PK +20== PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP +21== PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP +22== PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP +23== PVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP +24== PVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP +25== PPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KP +26== PTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-AP +27== PLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKK +28== PLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKK +29== PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR +30== PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR +31=p PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR +32== PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK +33=p PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK +34== PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR +35=p PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR +36== PLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHA + : : * : : :*: : : + +1== FGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNN +2== FGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNN +3== FSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHN +4=p FSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNN +5=p FSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNN +6== FKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNN +7== FSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRS +8=opsin, FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV +9== FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV +10== FDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV +11== FDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGC +12== FDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGV +13== FQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN-- +14== MTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDT +15== MTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDT +16== MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT +17== MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT +18== LTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNT +19== LTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNT +20== MTIPLALGKM---------------------------YVPEGNLTSCGIDYLE--RDWNP +21== MTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP +22== MTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP +23== MTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNP +24== MTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNP +25== LSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLS +26== LTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSS +27== MSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPST +28== MSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTT +29== QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------- +30== QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------- +31=p QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------- +32== RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH--------- +33=p RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH--------- +34== RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------- +35=p RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------- +36== STAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV--------- + : + +1== ESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE------------------------ +2== ESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE------------------------ +3== ESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE------------------------ +4=p ESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA------------------------ +5=p ESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA------------------------ +6== ESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL------------------------ +7== ESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA------------------------ +8=opsin, QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA------------------------ +9== QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA------------------------ +10== QSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA------------------------ +11== QSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA------------------------ +12== QSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA------------------------ +13== NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA------------------------ +14== RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------ +15== RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------ +16== RLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA------------------------ +17== RLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA------------------------ +18== FSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA------------------------ +19== ITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA------------------------ +20== RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------ +21== RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------ +22== RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------ +23== RSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA------------------------ +24== RSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA------------------------ +25== ASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN------------------------ +26== ASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ------------------------ +27== RSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE------------------------ +28== RSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE------------------------ +29== -GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------------- +30== -GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------------- +31=p -GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------------- +32== -GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------ +33=p -GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------ +34== -GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE +35=p -GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE +36== -GYQIFATASSFYVPVLIILILYWRIYQTARKRIR------------------------- + * : * : + +1== ---------------------------------------------------AAAAQQ--- +2== ---------------------------------------------------AAAQQQ--- +3== ---------------------------------------------------AAAQQQ--- +4=p ---------------------------------------------------AAAQQQ--- +5=p ---------------------------------------------------AAAQQQ--- +6== ---------------------------------------------------AAKAQA--- +7== ---------------------------------------------------VAAQQQ--- +8=opsin, ---------------------------------------------------VAKQQK--- +9== ---------------------------------------------------VAKQQK--- +10== ---------------------------------------------------VAKQQK--- +11== ---------------------------------------------------VAAQQK--- +12== ---------------------------------------------------VAAQQK--- +13== ---------------------------------------------------AAAQQK--- +14== ---------------------------------------------------LRDQAKKM- +15== ---------------------------------------------------LRDQAKKM- +16== ---------------------------------------------------LREQAKKM- +17== ---------------------------------------------------LREQAKKM- +18== ---------------------------------------------------MRAQAKKM- +19== ---------------------------------------------------MRAQAKKM- +20== ---------------------------------------------------MREQAKKM- +21== ---------------------------------------------------MREQAKKM- +22== ---------------------------------------------------MREQAKKM- +23== ---------------------------------------------------MREQAKKM- +24== ---------------------------------------------------MRDQAKKM- +25== ---------------------------------------------------MREQAKKM- +26== ---------------------------------------------------LREQAKKM- +27== ---------------------------------------------------MAAMAKRL- +28== ---------------------------------------------------MAAMAKRL- +29== ----------------------------------P--------GFPR----VEPDS---- +30== ----------------------------------P--------GFPR----VQPES---- +31=p ----------------------------------P--------GFPR----VQPES---- +32== ---------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR- +33=p ---------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG- +34== YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-- +35=p YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-- +36== --------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGI + + +1== ------------------------------------------------------------ +2== ------------------------------------------------------------ +3== ------------------------------------------------------------ +4=p ------------------------------------------------------------ +5=p ------------------------------------------------------------ +6== ------------------------------------------------------------ +7== ------------------------------------------------------------ +8=opsin, ------------------------------------------------------------ +9== ------------------------------------------------------------ +10== ------------------------------------------------------------ +11== ------------------------------------------------------------ +12== ------------------------------------------------------------ +13== ------------------------------------------------------------ +14== --------------------------------NVESL----------------------- +15== --------------------------------NVDSL----------------------- +16== --------------------------------NVESL----------------------- +17== --------------------------------NVESL----------------------- +18== --------------------------------NVSTL----------------------- +19== --------------------------------NVTNL----------------------- +20== --------------------------------NVKSL----------------------- +21== --------------------------------NVKSL----------------------- +22== --------------------------------NVKSL----------------------- +23== --------------------------------NVKSL----------------------- +24== --------------------------------NVKSL----------------------- +25== --------------------------------NVASL----------------------- +26== --------------------------------NVASL----------------------- +27== --------------------------------NAKEL----------------------- +28== --------------------------------NAKEL----------------------- +29== ---VIAL-----------------NGIVKLQ--------KEVEECAN------------- +30== ---VISL-----------------NGVVKLQ--------KEVEECAN------------- +31=p ---VISL-----------------NGVVKLQ--------KEVEECAN------------- +32== --------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG- +33=p --------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG- +34== --------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS- +35=p --------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS- +36== AAAVVAVIGRPLPTISETTTTGFTNVSS----NNTS---PEKQSCANGLEADPPTTGYGA + + +1== --------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT- +2== --------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT- +3== --------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT- +4=p --------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF- +5=p --------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF- +6== --------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS- +7== --------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN- +8=opsin, --------------ESESTQK------AEKEVTRMVVVMVLAFC---------------- +9== --------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA- +10== --------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA- +11== --------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA- +12== --------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA- +13== --------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT- +14== -----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF- +15== -----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF- +16== -----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF- +17== -----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF- +18== -----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM- +19== -----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL- +20== -----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF- +21== -----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF- +22== -----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF- +23== -----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF- +24== -----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF- +25== -----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF- +26== -----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF- +27== -----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF- +28== -----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF- +29== -----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI +30== -----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI +31=p -----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI +32== -PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF- +33=p -SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF- +34== ------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF- +35=p ------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF- +36== VAAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT- + * . : + +1== HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC---------GKN +2== HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC---------GKN +3== NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC---------GKN +4=p NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC---------GKN +5=p NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC---------GKN +6== HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC---------GKN +7== NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC---------GKA +8=opsin, ------------------------------------------------------------ +9== NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK +10== NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK +11== NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF----------GKK +12== NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF----------GKK +13== HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY-----QPQR +14== GDKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL--------N +15== GDKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI--------S +16== GDKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV--------N +17== GDKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV--------N +18== GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HET +19== GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HEK +20== KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV +21== KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV +22== KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV +23== KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GNT +24== KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GTT +25== NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------AA +26== SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GSG +27== GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKEC +28== GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKET +29== CGTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR--------- +30== CGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR--------- +31=p CGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR--------- +32== CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC--------- +33=p CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC--------- +34== VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--------- +35=p VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--------- +36== CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV--------- + + +1== PLGD-DE--SGASTSKTEVSSVS-TSPV-------------------------------- +2== PLGD-DE--ASATASKTE------TSQV-------------------------------- +3== PFGD-EDVSSTVSQSKTEVSSVS-SSQV-------------------------------- +4=p PLGD-DE-SSTVSTSKTEVSS------V-------------------------------- +5=p PLGD-EE-SSTVSTSKTEVSS------V-------------------------------- +6== -IEE-DE--ASTSSQVTQVSS------V-------------------------------- +7== -MTD-ES--DTCSSQKTEVSTVS-STQV-------------------------------- +8=opsin, ------------------------------------------------------------ +9== -VDD-GS--ELSSASKTEVSSV---SSV-------------------------------- +10== -VDD-GS--ELSSASKTEVSSV---SSV-------------------------------- +11== -VDD-GS--EASTTSRTEVSSVS-NSSV-------------------------------- +12== -VDD-GS--EVST-SRTEVSSVS-NSSV-------------------------------- +13== -TGK-AS--PGTPGPHADVTAAGLRNKV-------------------------------- +14== EKAP-ES-SAVASTSTTQEP-QQ-TTAA-------------------------------- +15== EKAP-ES-RAAISTSTTQEQ-QQ-TTAA-------------------------------- +16== EKSG-EI-SSAQST-TTQEQ-QQ-TTAA-------------------------------- +17== EKSG-EA-SSAQST-TTQEQTQQ-TSAA-------------------------------- +18== ETKS-ND-DSQSNSTVAQDKA--------------------------------------- +19== DPND-VE-ENQSSNTQTQEKS--------------------------------------- +20== DDGK-SS-DAQSQATASEAESKA------------------------------------- +21== DDGK-SS-DAQSQATASEAESKA------------------------------------- +22== DDGK-SS-EAQSQATTSEAESKA------------------------------------- +23== DEPKPDA-PASDTETTSEADSKA------------------------------------- +24== DEPKPDA-PPSDTETTSEAESKD------------------------------------- +25== EPSS-DA-VSTTSGTTTVTDNEK-SNA--------------------------------- +26== ESGS-DV-KSEASATTTMEEKPK-IPEA-------------------------------- +27== EDAN-DA-EEEVVASER--GGES-RDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP +28== EDDK-DA-ETEIPAGESSDAAPS-ADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP +29== -----NINRKLSAAGMHEALKLA------------------------------------- +30== -----NINRKLSAAGMHEALKLA------------------------------------- +31=p -----NINRKLSAAGMHEALKLA------------------------------------- +32== -----RQ----------------------------------------------------- +33=p -----RR----------------------------------------------------- +34== -----RGHR--------------------------------------------------- +35=p -----RGHR--------------------------------------------------- +36== -----RRRRA-------------------------------------------------- + + +1== --------------------------------------------SPA------------- +2== --------------------------------------------APA------------- +3== --------------------------------------------SPA------------- +4=p --------------------------------------------SPA------------- +5=p --------------------------------------------SPA------------- +6== --------------------------------------------APEK------------ +7== --------------------------------------------GPN------------- +8=opsin, ------------------------------------------------------------ +9== --------------------------------------------SPA------------- +10== --------------------------------------------SPA------------- +11== --------------------------------------------APA------------- +12== --------------------------------------------SPA------------- +13== --------------------------------------------MPAHP---V------- +14== ------------------------------------------------------------ +15== ------------------------------------------------------------ +16== ------------------------------------------------------------ +17== ------------------------------------------------------------ +18== ------------------------------------------------------------ +19== ------------------------------------------------------------ +20== ------------------------------------------------------------ +21== ------------------------------------------------------------ +22== ------------------------------------------------------------ +23== ------------------------------------------------------------ +24== ------------------------------------------------------------ +25== ------------------------------------------------------------ +26== ------------------------------------------------------------ +27== QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQG---VDNQAYQA +28== QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQG---VDNQAYQA +29== -------------------------------------------ERPERPEFVLQNADYCR +30== -------------------------------------------ERPERSEFVLQNSDHCG +31=p -------------------------------------------ERPERSEFVLQNSDHCG +32== ------------------------------------------------------------ +33=p ------------------------------------------------------------ +34== ------------------------------------------------------------ +35=p ------------------------------------------------------------ +36== ---------------------------------------------PQ------------- + + +1== ------ +2== ------ +3== ------ +4=p ------ +5=p ------ +6== ------ +7== ------ +8=opsin, ------ +9== ------ +10== ------ +11== ------ +12== ------ +13== ------ +14== ------ +15== ------ +16== ------ +17== ------ +18== ------ +19== ------ +20== ------ +21== ------ +22== ------ +23== ------ +24== ------ +25== ------ +26== ------ +27== ------ +28== ------ +29== KKGHDS +30== KKGHDT +31=p KKGHDT +32== ------ +33=p ------ +34== ------ +35=p ------ +36== ------ + diff -r 000000000000 -r a4e086689fae test-data/sample.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample.fa Thu Jun 18 08:59:56 2015 -0400 @@ -0,0 +1,285 @@ +> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91] +MNGTEGDNFYVPFSNKTGLARSPYEYPQYYLAEPWKYSALAAYMFFLILVGFPVNFLTLF +VTVQHKKLRTPLNYILLNLAMANLFMVLFGFTVTMYTSMNGYFVFGPTMCSIEGFFATLG +GEVALWSLVVLAIERYIVICKPMGNFRFGNTHAIMGVAFTWIMALACAAPPLVGWSRYIP +EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES +ASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFTHQGSDFGATFMTLPAFFAKSSAL +YNPVIYILMNKQFRNCMITTLCCGKNPLGDDESGASTSKTEVSSVSTSPVSPA +> 2== U22180 1 rat opsin [J.Mol.Neurosci.5(3),207-209'94] +MNGTEGPNFYVPFSNITGVVRSPFEQPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY +VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG +GEIGLWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP +EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES +ATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFTHQGSNFGPIFMTLPAFFAKTASI +YNPIIYIMMNKQFRNCMLTSLCCGKNPLGDDEASATASKTETSQVAPA +> 3== M92038 1 chicken green sensitive cone opsin [PNAS89,5932-5936'9 +MNGTEGINFYVPMSNKTGVVRSPFEYPQYYLAEPWKYRLVCCYIFFLISTGLPINLLTLL +VTFKHKKLRQPLNYILVNLAVADLFMACFGFTVTFYTAWNGYFVFGPVGCAVEGFFATLG +GQVALWSLVVLAIERYIVVCKPMGNFRFSATHAMMGIAFTWVMAFSCAAPPLFGWSRYMP +EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES +ATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFTNKGADFTATLMAVPAFFSKSSSL +YNPIIYVLMNKQFRNCMITTICCGKNPFGDEDVSSTVSQSKTEVSSVSSSQVSPA +> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish +MNGTEGKNFYVPMSNRTGLVRSPFEYPQYYLAEPWQFKILALYLFFLMSMGLPINGLTLV +VTAQHKKLRQPLNFILVNLAVAGTIMVCFGFTVTFYTAINGYFVLGPTGCAVEGFMATLG +GEVALWSLVVLAIERYIVVCKPMGSFKFSSSHAFAGIAFTWVMALACAAPPLFGWSRYIP +EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS +ASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFFNKGADFSAKFMAIPAFFSKSSAL +YNPVIYVLLNKQFRNCMLTTIFCGKNPLGDDESSTVSTSKTEVSSVSPA +> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish +MNGTEGNNFYVPLSNRTGLVRSPFEYPQYYLAEPWQFKLLAVYMFFLICLGLPINGLTLI +CTAQHKKLRQPLNFILVNLAVAGAIMVCFGFTVTFYTAINGYFALGPTGCAVEGFMATLG +GEVALWSLVVLAIERYIVVCKPMGSFKFSSTHASAGIAFTWVMAMACAAPPLVGWSRYIP +EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS +ASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFFNKGAAFSAQFMAIPAFFSKTSAL +YNPVIYVLLNKQFRSCMLTTLFCGKNPLGDEESSTVSTSKTEVSSVSPA +> 6== L11864 1 Carassius auratus blue cone opsin [Biochemistry32,208- +MKQVPEFHEDFYIPIPLDINNLSAYSPFLVPQDHLGNQGIFMAMSVFMFFIFIGGASINI +LTILCTIQFKKLRSHLNYILVNLSIANLFVAIFGSPLSFYSFFNRYFIFGATACKIEGFL +ATLGGMVGLWSLAVVAFERWLVICKPLGNFTFKTPHAIAGCILPWISALAASLPPLFGWS +RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKA +QADSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVSHRGEEFDLRMATIPSCLSK +ASTVYNPVIYVLMNKQFRSCMMKMVCGKNIEEDEASTSSQVTQVSSVAPEK +> 7== M13299 1 human BCP <>[Science232(4747),193-202'86] +MRKMSEEEFYLFKNISSVGPWDGPQYHIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATL +RYKKLRQPLNYILVNVSFGGFLLCIFSVFPVFVASCNGYFVFGRHVCALEGFLGTVAGLV +TGWSLAFLAFERYIVICKPFGNFRFSSKHALTVVLATWTIGIGVSIPPFFGWSRFIPEGL +QCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESATT +QKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVNNRNHGLDLRLVTIPSFFSKSACIYNP +IIYCFMNKQFQACIMKMVCGKAMTDESDTCSSQKTEVSTVSSTQVGPN +> 8=opsin, greensensitive human (fragment) S07060 +DLAETVIASTISIVNQVSGYFVLGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKP +FGNVRFDAKLAIVGIAFSWIWAAVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQS +YMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFC +> 9== K03494 1 human GCP <>[Science232(4747),193-202'86] +MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM +IFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISVVNQVYGYFV +LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNVRFDAKLAIVGIAFSWIWA +AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYL +QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAANPGYPFH +PLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS +VSPA +> 10== Z68193 1 human Red Opsin <>[] +MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM +IFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISIVNQVSGYFV +LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRFDAKLAIVGIAFSWIWS +AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYL +QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAANPGYAFH +PLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS +VSPA +> 11== M92036 1 Gecko gecko P521 [PNAS89,6841-6845'92] +MTEAWNVAVFAARRSRDDDDTTRGSVFTYTNTNNTRGPFEGPNYHIAPRWVYNLVSFFMI +IVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVASTISVFNQIFGYFIL +GHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNIKFDSKLAIIGIVFSWVWAW +GWSAPPIFGWSRYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQ +VWMAIRAVAAQQKESESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAANPGYAFHP +LAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLFGKKVDDGSEASTTSRTEVSSVSNS +SVAPA +> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90] +MAAWEAAFAARRRHEEEDTTRDSVFTYTNSNNTRGPFEGPNYHIAPRWVYNLTSVWMIFV +VAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIASTISVINQISGYFILGH +PMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNIKFDGKLAVAGILFSWLWSCAW +TAPPIFGWSRYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVW +LAIRAVAAQQKESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAANPGYAFHPLA +AALPAYFAKSATIYNPIIYVFMNRQFRNCILQLFGKKVDDGSEVSTSRTEVSSVSNSSVS +PA +> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95] +MSSNSSQAPPNGTPGPFDGPQWPYQAPQSTYVGVAVLMGTVVACASVVNGLVIVVSICYK +KLRSPLNYILVNLAVADLLVTLCGSSVSLSNNINGFFVFGRRMCELEGFMVSLTGIVGLW +SLAILALERYVVVCKPLGDFQFQRRHAVSGCAFTWGWALLWSAPPLLGWSSYVPEGLRTS +CGPNWYTGGSNNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEADTTQRAER +EVTRMVIVMVMAFLLCWLPYSTFALVVATHKGIIIQPVLASLPSYFSKTATVYNPIIYVF +MNKQFQSCLLEMLCCGYQPQRTGKASPGTPGPHADVTAAGLRNKVMPAHPV +> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87] +MESGNVSSSLFGNVSTALRPEARLSAETRLLGWNVPPEELRHIPEHWLTYPEPPESMNYL +LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVKTPIFIYNSFH +QGYALGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEGKMTHGKAIAMIIFIY +MYATPWVVACYTETWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYY +YSQIVGHVFSHEKALRDQAKKMNVESLRSNVDKNKETAEIRIAKAAITICFLFFCSWTPY +GVMSLIGAFGDKTLLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLALNE +KAPESSAVASTSTTQEPQQTTAA +> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92 +MEYHNVSSVLGNVSSVLRPDARLSAESRLLGWNVPPDELRHIPEHWLIYPEPPESMNYLL +GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIKTPIFIYNSFHQ +GYALGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEGKMTHGKAIAMIIFIYL +YATPWVVACYTESWGRFVPEGYLTSCTFDYLTDNFDTRLFVACIFFFSFVCPTTMITYYY +SQIVGHVFSHEKALRDQAKKMNVDSLRSNVDKSKEAAEIRIAKAAITICFLFFASWTPYG +VMSLIGAFGDKTLLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAISEK +APESRAAISTSTTQEQQQTTAA +> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87] +MEPLCNASEPPLRPEARSSGNGDLQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGVF +YIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFNSFHRGFAIY +LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYCT +PWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFFFSFVCPTLMILYYYSQI +VGHVFSHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMS +LIGAFGDKSLLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGVNEKSGE +ISSAQSTTTQEQQQTTAA +> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92 +MDALCNASEPPLRPEARMSSGSDELQFLGWNVPPDQIQYIPEHWLTQLEPPASMHYMLGV +FYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLKAPIFIYNSFHRGF +ALGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNRNMTFTKAVIMNIIIWLYC +TPWVVLPLTQFWDRFVPEGYLTSCSFDYLSDNFDTRLFVGTIFLFSFVVPTLMILYYYSQ +IVGHVFNHEKALREQAKKMNVESLRSNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM +SLIGAFGDKSLLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGVNEKSG +EASSAQSTTTQEQTQQTSAA +> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 [J.Exp.Biol.1 +MTNATGPQMAYYGAASMDFGYPEGVSIVDFVRPEIKPYVHQHWYNYPPVNPMWHYLLGVI +YLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTNVPFFTYNCFSGGV +WMFSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGPKLTTGKAVVFALISWV +IAIGCALPPFFGWGNYILEGILDSCSYDYLTQDFNTFSYNIFIFVFDYFLPAAIIVFSYV +FIVKAIFAHEAAMRAQAKKMNVSTLRSNEADAQRAEIRIAKTALVNVSLWFICWTPYALI +SLKGVMGDTSGITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCVHETET +KSNDDSQSNSTVAQDKA +> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 [J.Exp.Biol.1 +MANVTGPQMAFYGSGAATFGYPEGMTVADFVPDRVKHMVLDHWYNYPPVNPMWHYLLGVV +YLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTNFPPFCYNCFSGGR +WMFSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGPKLTQGKATFMCGLAWV +ISVGWSLPPFFGWGSYTLEGILDSCSYDYFTRDMNTITYNICIFIFDFFLPASVIVFSYV +FIVKAIFAHEAAMRAQAKKMNVTNLRSNEAETQRAEIRIAKTALVNVSLWFICWTPYAAI +TIQGLLGNAEGITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCVHEKDP +NDVEENQSSNTQTQEKS +> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85] +MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI +GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP +MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKMYVPEGNLTSC +GIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSL +RSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLFKFEGLTPLNTIWGACFAKS +AACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDAQSQATASEAESKA +> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85] +MESFAVAAAQLGPHFAPLSNGSVVDKVTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMI +GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP +MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSSIW +CLAPAFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAA +VSAHEKAMREQAKKMNVKSLRSSEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGL +FKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSDA +QSQATASEAESKA +> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204' +MDSFAAVATQLGPQFAAPSNGSVVDKVTPDMAHLISPYWDQFPAMDPIWAKILTAYMIII +GMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITNTPMMGINLYFETWVLGP +MMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPMTIPLALGKIAYIWFMSTIW +CCLAPVFGWSRYVPEGNLTSCGIDYLERDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIA +AVSAHEKAMREQAKKMNVKSLRSSEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMG +LFKFEGLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFGKVDDGKSSE +AQSQATTSEAESKA +> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86] +MERSHLPETPFDLAHSGPRFQAQSSGNGSVLDNVLPDMAHLVNPYWSRFAPMDPMMSKIL +GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY +ETWVLGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKILFI +WMMAVFWTVMPLIGWSAYVPEGNLTACSIDYMTRMWNPRSYLITYSLFVYYTPLFLICYS +YWFIIAAVAAHEKAMREQAKKMNVKSLRSSEDCDKSAEGKLAKVALTTISLWFMAWTPYL +VICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVFGNTD +EPKPDAPASDTETTSEADSKA +> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92 +MERSLLPEPPLAMALLGPRFEAQTGGNRSVLDNVLPDMAPLVNPHWSRFAPMDPTMSKIL +GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQSPVMIINFYY +ETWVLGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPMTIKTSIMKIAFI +WMMAVFWTIMPLIGWSSYVPEGNLTACSIDYMTRQWNPRSYLITYSLFVYYTPLFMICYS +YWFIIATVAAHEKAMRDQAKKMNVKSLRSSEDCDKSAENKLAKVALTTISLWFMAWTPYL +IICYFGLFKIDGLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVCGTTD +EPKPDAPPSDTETTSEAESKD +> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[] +MIAVSGPSYEAFSYGGQARFNNQTVVDKVPPDMLHLIDANWYQYPPLNPMWHGILGFVIG +MLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCMSPPMVINCYYETWVL +GPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGKPLSINGALIRIIAIWLFSL +GWTIAPMFGWNRYVPEGNMTACGTDYFNRGLLSASYLVCYGIWVYFVPLFLIIYSYWFII +QAVAAHEKNMREQAKKMNVASLRSSENQNTSAECKLAKVALMTISLWFMAWTPYLVINFS +GIFNLVKISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLACAAEPSSDAV +STTSGTTTVTDNEKSNA +> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93] +MANQLSYSSLGWPYQPNASVVDTMPKEMLYMIHEHWYAFPPMNPLWYSILGVAMIILGII +CVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFMMPTMTSNCFAETWILGPFMC +EVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAAPLTHKKATLLLLFVWIWSGGWTIL +PFFGWSRYVPEGNLTSCTVDYLTKDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAE +HEKQLREQAKKMNVASLRANADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVFS +SGTRLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLACGSGESGSDVKSE +ASATTTMEEKPKIPEA +> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88] +MVESTTLVNQTWWYNPTVDIHPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLF +SKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFMKKWIFGKVACQLYGLLGGIFG +FMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVWSVGPVFNWGAYVP +EGILTSCSFDYLSTDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAK +RLNAKELRKAQAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQFGPAEWVTPYAAEL +PVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECEDANDAEEEVVASER +GGESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGYPPQGYPPQGAYPPPQGYPPQGYPP +QGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA +> 28== X70498 1 Todarodes pacificus rhodopsin [FEBS317(1-2),5-11'93] +MGRDLRDNETWWYNPSIVVHPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFT +KTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFLKKWIFGFAACKVYGFIGGIFGF +MSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLWAIGPIFGWGAYTLE +GVLCNCSFDYISRDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR +LNAKELRKAQAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQFGPLEWVTPYAAQLP +VMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDDKDAETEIPAGESS +DAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGYPPQGYPPQGY +PPPPQGAPPQGAPPAAPPQGVDNQAYQA +> 29== L21195 1 human serotonin 5-HT7 receptor protein 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93] +MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL +VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM +DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW +AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV +QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL +PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQC +QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT +> 31=p A47425 serotonin receptor 5HT-7 - rat +MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL +VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM +DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW +AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV +QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL +PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQC +QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT +> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92] +MDVLSPGQGNNTTSPPAPFETGGNTTGISDVTVSYQVITSLLLGTLIFCAVLGNACVVAA +IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC +TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED +RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT +RHGASPAPQPKKSVNGESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGN +SKEHLPLPSEAGPTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP +FFIVALVLPFCESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC +RQ +> 33=p A35181 serotonin receptor class 1A - rat +MDVFSFGQGNNTTASQEPFGTGGNVTSISDVTFSYQVITSLLLGTLIFCAVLGNACVVAA +IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC +TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED +RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT +SLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGN +SKEHLPLPSESGSNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP +FFIVALVLPFCESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC +RR +> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93] +MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS +HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV +MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT +ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP +DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE +ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA +NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK +LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL +NPIIYTIFSPEFRSAFQKILFGKYRRGHR +> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail +MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS +HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV +MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT +ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP +DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE +ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA +NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK +LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL +NPIIYTIFSPEFRSAFQKILFGKYRRGHR +> 36== X95604 1 Bombyx mori serotonin receptor [InsectBiochem.Mol.Bi +MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVPNTTWWQASAPFDTPAALVR +AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLVMPLGAV +YEVVQRWTLGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTNIDYIHASTAKRVGMM +IACVWTVSFFVCIAQLLGWKDPDWNQRVSEDLRCVVSQDVGYQIFATASSFYVPVLIILI +LYWRIYQTARKRIRRRRGATARGGVGPPPVPAGGALVAGGGSGGIAAAVVAVIGRPLPTI +SETTTTGFTNVSSNNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKR +ERKAAKTLAIITGAFVACWLPFFVLAILVPTCDCEVSPVLTSLSLWLGYFNSTLNPVIYT +VFSPEFRHAFQRLLCGRRVRRRRAPQ diff -r 000000000000 -r a4e086689fae tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jun 18 08:59:56 2015 -0400 @@ -0,0 +1,22 @@ + + + + + + https://raw.githubusercontent.com/bgruening/download_store/master/MAFFT/mafft-7.221-with-extensions-src.tgz + ./core + sed -i "s|\(PREFIX = \).*$|\1$INSTALL_DIR|" Makefile + make clean; make; make install + ../extensions + sed -i "s|\(PREFIX = \).*$|\1$INSTALL_DIR|" Makefile + make clean; make; make install + + $INSTALL_DIR + $INSTALL_DIR/bin + + + + + + +