# HG changeset patch # User peterjc # Date 1436107047 14400 # Node ID c16c30e9ad5b0a2373d75fc83617f1589c1eaa3e # Parent 2fe07f50a41ef0fe26d89812a4ab9276e45494e3 Uploaded v0.1.03 (internal changes); v0.1.02 (BLAST+ 2.2.30 etc) diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/blastdb.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastdb.loc Sun Jul 05 10:37:27 2015 -0400 @@ -0,0 +1,7 @@ +# This is a test file distributed with the Galaxy BLAST+ wrapper for +# defining a list of nucleotide BLAST databases used in functional +# tests for blastn etc. +# +# See the file tool-data/blastdb.loc.sample for more information. +# +three_human_mRNA Three Human mRNAs ${__HERE__}/three_human_mRNA.fasta diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/blastdb_d.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastdb_d.loc Sun Jul 05 10:37:27 2015 -0400 @@ -0,0 +1,7 @@ +# This is a test file distributed with the Galaxy BLAST+ wrapper for +# defining a list of protein domain BLAST databases used in functional +# tests of rpsblast etc. +# +# See the file tool-data/blastdb_d.loc.sample for more information. +# +cd00003_and_cd00008 Domains CD00003 (PNPsynthase) and CD00008 (PIN_53EXO-like) ${__HERE__}/cd00003_and_cd00008 diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/blastdb_p.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastdb_p.loc Sun Jul 05 10:37:27 2015 -0400 @@ -0,0 +1,8 @@ +# This is a test file distributed with the Galaxy BLAST+ wrapper for +# defining a list of protein BLAST databases used in functional tests +# for blastp etc. +# +# See the file tool-data/blastdb_p.loc.sample for more information. +# +four_human_proteins Four Human Proteins (no taxid) ${__HERE__}/four_human_proteins.fasta +four_human_proteins_taxid Four Human Proteins (with taxid) ${__HERE__}/four_human_proteins_taxid.fasta diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/blastn_chimera_vs_three_human_max1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_chimera_vs_three_human_max1.tabular Sun Jul 05 10:37:27 2015 -0400 @@ -0,0 +1,1 @@ +chimera ENA|AB011145|AB011145.1 100.00 4560 0 0 1 4560 121 4680 0.0 8421 diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/blastn_chimera_vs_three_human_max1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_chimera_vs_three_human_max1.txt Sun Jul 05 10:37:27 2015 -0400 @@ -0,0 +1,356 @@ +BLASTN 2.2.30+ + + +Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb +Miller (2000), "A greedy algorithm for aligning DNA sequences", J +Comput Biol 2000; 7(1-2):203-14. + + + +Database: Just 3 human mRNA sequences + 3 sequences; 10,732 total letters + + + +Query= chimera chunks of AB011145 plus M10051 plus BC112106 + +Length=9973 + Score E +Sequences producing significant alignments: (Bits) Value + + ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein,... 8421 0.0 + + +> ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, +partial cds. +Length=4796 + + Score = 8421 bits (4560), Expect = 0.0 + Identities = 4560/4560 (100%), Gaps = 0/4560 (0%) + Strand=Plus/Plus + +Query 1 GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC 60 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 121 GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC 180 + +Query 61 CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA 120 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 181 CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA 240 + +Query 121 TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC 180 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 241 TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC 300 + +Query 181 TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT 240 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 301 TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT 360 + +Query 241 TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA 300 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 361 TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA 420 + +Query 301 CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA 360 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 421 CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA 480 + +Query 361 TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA 420 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 481 TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA 540 + +Query 421 CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC 480 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 541 CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC 600 + +Query 481 TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA 540 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 601 TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA 660 + +Query 541 TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT 600 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 661 TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT 720 + +Query 601 TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG 660 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 721 TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG 780 + +Query 661 GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA 720 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 781 GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA 840 + +Query 721 TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA 780 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 841 TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA 900 + +Query 781 ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG 840 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 901 ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG 960 + +Query 841 TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA 900 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 961 TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA 1020 + +Query 901 CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC 960 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1021 CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC 1080 + +Query 961 AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT 1020 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1081 AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT 1140 + +Query 1021 CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA 1080 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1141 CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA 1200 + +Query 1081 ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC 1140 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1201 ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC 1260 + +Query 1141 CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA 1200 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1261 CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA 1320 + +Query 1201 TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG 1260 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1321 TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG 1380 + +Query 1261 CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC 1320 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1381 CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC 1440 + +Query 1321 TATGTGTatttttattttgaataaacagaaagaaattttgggtttttaatttttttCTCC 1380 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1441 TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC 1500 + +Query 1381 CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTaaaaaaaaaaaaaCCTGCTA 1440 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1501 CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA 1560 + +Query 1441 GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT 1500 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1561 GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT 1620 + +Query 1501 ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA 1560 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1621 ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA 1680 + +Query 1561 GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT 1620 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1681 GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT 1740 + +Query 1621 CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT 1680 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1741 CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT 1800 + +Query 1681 GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA 1740 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1801 GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA 1860 + +Query 1741 TTGCTTATGAAAATTCCATAGTGGTAtttttttGGATTCTTAATGTGTAACTTAAACATA 1800 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1861 TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA 1920 + +Query 1801 CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA 1860 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1921 CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA 1980 + +Query 1861 AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG 1920 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 1981 AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG 2040 + +Query 1921 GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG 1980 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2041 GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG 2100 + +Query 1981 AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT 2040 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2101 AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT 2160 + +Query 2041 TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT 2100 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2161 TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT 2220 + +Query 2101 TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACtt 2160 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2221 TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT 2280 + +Query 2161 tttttCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG 2220 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2281 TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG 2340 + +Query 2221 CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA 2280 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2341 CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA 2400 + +Query 2281 CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT 2340 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2401 CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT 2460 + +Query 2341 TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC 2400 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2461 TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC 2520 + +Query 2401 TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT 2460 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2521 TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT 2580 + +Query 2461 GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT 2520 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2581 GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT 2640 + +Query 2521 AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT 2580 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2641 AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT 2700 + +Query 2581 TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT 2640 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2701 TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT 2760 + +Query 2641 GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT 2700 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2761 GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT 2820 + +Query 2701 TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA 2760 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2821 TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA 2880 + +Query 2761 GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA 2820 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2881 GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA 2940 + +Query 2821 TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA 2880 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 2941 TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA 3000 + +Query 2881 TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT 2940 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3001 TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT 3060 + +Query 2941 AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT 3000 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3061 AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT 3120 + +Query 3001 TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT 3060 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3121 TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT 3180 + +Query 3061 ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT 3120 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3181 ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT 3240 + +Query 3121 GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA 3180 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3241 GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA 3300 + +Query 3181 GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT 3240 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3301 GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT 3360 + +Query 3241 GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT 3300 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3361 GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT 3420 + +Query 3301 TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT 3360 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3421 TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT 3480 + +Query 3361 TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT 3420 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3481 TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT 3540 + +Query 3421 AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG 3480 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3541 AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG 3600 + +Query 3481 AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC 3540 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3601 AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC 3660 + +Query 3541 TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT 3600 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3661 TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT 3720 + +Query 3601 CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA 3660 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3721 CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA 3780 + +Query 3661 GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT 3720 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3781 GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT 3840 + +Query 3721 TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT 3780 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3841 TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT 3900 + +Query 3781 GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT 3840 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3901 GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT 3960 + +Query 3841 TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG 3900 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 3961 TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG 4020 + +Query 3901 GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA 3960 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4021 GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA 4080 + +Query 3961 CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA 4020 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4081 CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA 4140 + +Query 4021 CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA 4080 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4141 CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA 4200 + +Query 4081 GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA 4140 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4201 GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA 4260 + +Query 4141 GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT 4200 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4261 GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT 4320 + +Query 4201 GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG 4260 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4321 GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG 4380 + +Query 4261 GAAACTTTTAGATGACATTCTACAAATTAtttttttCTTTAAATTAAAAGAACCTAGCCA 4320 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4381 GAAACTTTTAGATGACATTCTACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCA 4440 + +Query 4321 ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA 4380 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4441 ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA 4500 + +Query 4381 AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA 4440 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4501 AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA 4560 + +Query 4441 ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA 4500 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4561 ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA 4620 + +Query 4501 GGTTGTCAAGAAGGCttttttttttttcttttttAAACCTGAGGGCAAAAAGGAATGGAT 4560 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +Sbjct 4621 GGTTGTCAAGAAGGCTTTTTTTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGAT 4680 + + + +Lambda K H + 1.33 0.621 1.12 + +Gapped +Lambda K H + 1.28 0.460 0.850 + +Effective search space used: 106299490 + + + Database: Just 3 human mRNA sequences + Posted date: Dec 26, 2014 5:54 AM + Number of letters in database: 10,732 + Number of sequences in database: 3 + + + +Matrix: blastn matrix 1 -2 +Gap Penalties: Existence: 0, Extension: 2.5 diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/blastn_rhodopsin_vs_three_human.xml --- a/test-data/blastn_rhodopsin_vs_three_human.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/test-data/blastn_rhodopsin_vs_three_human.xml Sun Jul 05 10:37:27 2015 -0400 @@ -2,7 +2,7 @@ blastn - BLASTN 2.2.29+ + BLASTN 2.2.30+ Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), "A greedy algorithm for aligning DNA sequences", J Comput Biol 2000; 7(1-2):203-14. Query_1 diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/blastp_four_human_vs_rhodopsin.xml --- a/test-data/blastp_four_human_vs_rhodopsin.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/test-data/blastp_four_human_vs_rhodopsin.xml Sun Jul 05 10:37:27 2015 -0400 @@ -2,7 +2,7 @@ blastp - BLASTP 2.2.29+ + BLASTP 2.2.30+ Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402. sp|Q9BS26|ERP44_HUMAN diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/blastx_rhodopsin_vs_four_human.xml --- a/test-data/blastx_rhodopsin_vs_four_human.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/test-data/blastx_rhodopsin_vs_four_human.xml Sun Jul 05 10:37:27 2015 -0400 @@ -2,7 +2,7 @@ blastx - BLASTX 2.2.29+ + BLASTX 2.2.30+ Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402. Query_1 diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/blastx_rhodopsin_vs_four_human_all.tabular --- a/test-data/blastx_rhodopsin_vs_four_human_all.tabular Mon Dec 01 05:59:16 2014 -0500 +++ b/test-data/blastx_rhodopsin_vs_four_human_all.tabular Sun Jul 05 10:37:27 2015 -0400 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|57163782|ref|NM_001009242.1| gi|57163782|ref|NM_001009242.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA71FL4TS79VA9ML34VI71LI7SASA21LI13TA1GV11 99 99 N/A N/A N/A N/A N/A -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A 0 gi|2734705|gb|U59921.1|BBU59921 gi|2734705|gb|U59921.1|BBU59921 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 10IV1MF2KA20YF1IM1CA7LV7ML5IV17FV1NDHL4CG3VS1ML1ST1MLNH3IVLF1AP3YNVL36SG4VI7IV3SA2VA3LA9ML5VI26LM8RQ2CF30VIFA13FY2STNH3EN7VI7SASA9LM8IL2LI6FL1ED1DE 63 63 N/A N/A N/A N/A N/A -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA9HY61FL4TS28 22 8 N/A N/A N/A N/A N/A -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 18LV3GS19LI7SASA13 22 5 N/A N/A N/A N/A N/A -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 6ML34VI14VARASA 22 4 N/A N/A N/A N/A N/A -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 1AG36LV1LF13VA4 22 4 N/A N/A N/A N/A N/A -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 9LI13TA1AV 22 2 N/A N/A N/A N/A N/A -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A 0 gi|283855822|gb|GQ290312.1| gi|283855822|gb|GQ290312.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA66ND4FL4TS5ML7AP49LV25ML34VI40VI6LV19VI11SASA21LI13TA1 99 99 N/A N/A N/A N/A N/A -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|18148870|dbj|AB062417.1| gi|18148870|dbj|AB062417.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA9AY22MV38FL4TS79VA9ML10PLHKEP1TV14IT2LM1VI47LV3GS11DN14TSSA1VI17VL2LI13TA13 99 99 N/A N/A N/A N/A N/A -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A 0 gi|12583664|dbj|AB043817.1| gi|12583664|dbj|AB043817.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 10IV1MF22AQ2AM1SA4FL2IVAL12IVEQ23FL4TS1ML3ML12IL14CS8WYMV5VMTS6SN6MAVF2TV6LA3FA20RLAK1GEIV10TVCV2ST2LMAIVI1SF4RQ2CF12EA3RK2RK6VI4SA2VI9WF8TN9SA5SA1LI3MV3CM6HN2IL2LI6FLEGEDEDDEGA 74 74 N/A N/A N/A N/A N/A +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|57163782|ref|NM_001009242.1| gi|57163782|ref|NM_001009242.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA71FL4TS79VA9ML34VI71LI7SASA21LI13TA1GV11 99 33 N/A N/A N/A N/A N/A +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A 0 gi|2734705|gb|U59921.1|BBU59921 gi|2734705|gb|U59921.1|BBU59921 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 10IV1MF2KA20YF1IM1CA7LV7ML5IV17FV1NDHL4CG3VS1ML1ST1MLNH3IVLF1AP3YNVL36SG4VI7IV3SA2VA3LA9ML5VI26LM8RQ2CF30VIFA13FY2STNH3EN7VI7SASA9LM8IL2LI6FL1ED1DE 63 21 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA9HY61FL4TS28 22 3 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 18LV3GS19LI7SASA13 22 2 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 6ML34VI14VARASA 22 1 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 1AG36LV1LF13VA4 22 1 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 9LI13TA1AV 22 1 N/A N/A N/A N/A N/A +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A 0 gi|283855822|gb|GQ290312.1| gi|283855822|gb|GQ290312.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA66ND4FL4TS5ML7AP49LV25ML34VI40VI6LV19VI11SASA21LI13TA1 99 33 N/A N/A N/A N/A N/A +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|18148870|dbj|AB062417.1| gi|18148870|dbj|AB062417.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA9AY22MV38FL4TS79VA9ML10PLHKEP1TV14IT2LM1VI47LV3GS11DN14TSSA1VI17VL2LI13TA13 99 33 N/A N/A N/A N/A N/A +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A 0 gi|12583664|dbj|AB043817.1| gi|12583664|dbj|AB043817.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 10IV1MF22AQ2AM1SA4FL2IVAL12IVEQ23FL4TS1ML3ML12IL14CS8WYMV5VMTS6SN6MAVF2TV6LA3FA20RLAK1GEIV10TVCV2ST2LMAIVI1SF4RQ2CF12EA3RK2RK6VI4SA2VI9WF8TN9SA5SA1LI3MV3CM6HN2IL2LI6FLEGEDEDDEGA 74 25 N/A N/A N/A N/A N/A diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/chimera.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/chimera.fasta Sun Jul 05 10:37:27 2015 -0400 @@ -0,0 +1,168 @@ +>chimera chunks of AB011145 plus M10051 plus BC112106 +GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC +CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA +TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC +TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT +TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA +CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA +TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA +CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC +TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA +TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT +TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG +GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA +TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA +ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG +TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA +CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC +AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT +CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA +ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC +CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA +TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG +CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC +TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC +CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA +GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT +ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA +GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT +CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT +GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA +TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA +CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA +AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG +GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG +AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT +TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT +TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT +TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG +CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA +CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT +TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC +TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT +GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT +AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT +TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT +GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT +TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA +GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA +TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA +TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT +AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT +TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT +ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT +GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA +GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT +GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT +TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT +TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT +AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG +AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC +TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT +CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA +GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT +TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT +GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT +TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG +GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA +CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA +CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA +GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA +GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT +GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG +GAAACTTTTAGATGACATTCTACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCA +ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA +AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA +ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA +GGTTGTCAAGAAGGCTTTTTTTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGAT +TGGGGGCCGCCTCGGAGCATGACCCCCGCGGGCCAGCGCCGCGCGCCTGATCCGAGGAGA +CCCCGCGCTCCCGCAGCCATGGGCACCGGGGGCCGGCGGGGGGCGGCGGCCGCGCCGCTG +CTGGTGGCGGTGGCCGCGCTGCTACTGGGCGCCGCGGGCCACCTGTACCCCGGAGAGGTG +TGTCCCGGCATGGATATCCGGAACAACCTCACTAGGTTGCATGAGCTGGAGAATTGCTCT +GTCATCGAAGGACACTTGCAGATACTCTTGATGTTCAAAACGAGGCCCGAAGATTTCCGA +GACCTCAGTTTCCCCAAACTCATCATGATCACTGATTACTTGCTGCTCTTCCGGGTCTAT +GGGCTCGAGAGCCTGAAGGACCTGTTCCCCAACCTCACGGTCATCCGGGGATCACGACTG +TTCTTTAACTACGCGCTGGTCATCTTCGAGATGGTTCACCTCAAGGAACTCGGCCTCTAC +AACCTGATGAACATCACCCGGGGTTCTGTCCGCATCGAGAAGAACAATGAGCTCTGTTAC +TTGGCCACTATCGACTGGTCCCGTATCCTGGATTCCGTGGAGGATAATCACATCGTGTTG +AACAAAGATGACAACGAGGAGTGTGGAGACATCTGTCCGGGTACCGCGAAGGGCAAGACC +AACTGCCCCGCCACCGTCATCAACGGGCAGTTTGTCGAACGATGTTGGACTCATAGTCAC +TGCCAGAAAGTTTGCCCGACCATCTGTAAGTCACACGGCTGCACCGCCGAAGGCCTCTGT +TGCCACAGCGAGTGCCTGGGCAACTGTTCTCAGCCCGACGACCCCACCAAGTGCGTGGCC +TGCCGCAACTTCTACCTGGACGGCAGGTGTGTGGAGACCTGCCCGCCCCCGTACTACCAC +TTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAGGACCTGCACCACAAATGCAAG +AACTCGCGGAGGCAGGGCTGCCACCAATACGTCATTCACAACAACAAGTGCATCCCTGAG +TGTCCCTCCGGGTACACGATGAATTCCAGCAACTTGCTGTGCACCCCATGCCTGGGTCCC +TGTCCCAAGGTGTGCCACCTCCTAGAAGGCGAGAAGACCATCGACTCGGTGACGTCTGCC +CAGGAGCTCCGAGGATGCACCGTCATCAACGGGAGTCTGATCATCAACATTCGAGGAGGC +AACAATCTGGCAGCTGAGCTAGAAGCCAACCTCGGCCTCATTGAAGAAATTTCAGGGTAT +CTAAAAATCCGCCGATCCTACGCTCTGGTGTCACTTTCCTTCTTCCGGAAGTTACGTCTG +ATTCGAGGAGAGACCTTGGAAATTGGGAACTACTCCTTCTATGCCTTGGACAACCAGAAC +CTAAGGCAGCTCTGGGACTGGAGCAAACACAACCTCACCACCACTCAGGGGAAACTCTTC +TTCCACTATAACCCCAAACTCTGCTTGTCAGAAATCCACAAGATGGAAGAAGTTTCAGGA +ACCAAGGGGCGCCAGGAGAGAAACGACATTGCCCTGAAGACCAATGGGGACAAGGCATCC +TGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTG +AGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTAC +AAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAAC +AGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAAC +CACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAG +ACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTAT +GTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCA +TCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCAC +TACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGC +CTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT +CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG +ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT +TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC +CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG +CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG +CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC +TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC +AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT +GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG +AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG +GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT +GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC +TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT +GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT +ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT +TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC +GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC +GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG +GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG +GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC +AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC +CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA +GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG +TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA +ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC +AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC +ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA +CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT +CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA +TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG +CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT +GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC +TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC +TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG +ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT +CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT +CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG +GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC +TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT +CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC +ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT +TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG +GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC +CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA +CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC +TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC +TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG +GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC +CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG +GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA +GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG +CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT +GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG +CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA +GCCATCCCACCAG diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/four_human_proteins.dbinfo.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.dbinfo.txt Sun Jul 05 10:37:27 2015 -0400 @@ -0,0 +1,7 @@ +Database: Just 4 human proteins + 4 sequences; 3,297 total residues + +Date: Feb 10, 2014 6:40 PM Longest sequence: 1,382 residues + +Volumes: + /mnt/galaxy/galaxy_blast/test-data/four_human_proteins_taxid.fasta diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/four_human_proteins.fasta --- a/test-data/four_human_proteins.fasta Mon Dec 01 05:59:16 2014 -0500 +++ b/test-data/four_human_proteins.fasta Sun Jul 05 10:37:27 2015 -0400 @@ -1,61 +1,48 @@ >sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 -MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF -SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK -REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER -VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK -CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD -CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF -HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL +MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFP +NENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSK +RNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK +CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPV +IAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLL +RDRDEL >sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 -MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEG -GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS -DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD -LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG -KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP -DPEHRPDIFQVSYFAFKFAKKDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDT -IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE -ILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ -QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQY -QQAFFQQQMLAQHQPSQQQASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV -ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEELLDREFDLLRSNRLEERASSD -KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD -QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPE -NLGHRPLLMDSEDEEEEEKHSSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA -QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK -APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD -EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARR -HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS -WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ -SQQSQPVELDPFGAAPFPSKQ +MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEGGFSTVFLVRTHGGIRCALKR +MYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFC +DTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG +KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQVSYFAFKFAK +KDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDTIGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLA +PGEFGNHRPKGALRPGNGPEILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ +QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQYQQAFFQQQMLAQHQPSQQQA +SPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSVADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEEL +LDREFDLLRSNRLEERASSDKNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD +QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPENLGHRPLLMDSEDEEEEEKH +SSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSAQLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNL +PQHRFPAAGLEQEEFDVFTKAPFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD +EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARRHKKVGRRDSQSSNEFLTISD +SKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLSWHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKM +DDFGAVPFTELVVQSITPHQSQQSQPVELDPFGAAPFPSKQ >sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 -MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL -QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL -VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE -ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL -GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG -CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC -TVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETL -EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE -RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ -NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS -DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE -RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL -KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAF -PNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV -SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV -SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG -PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR -EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG -FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA -AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV -RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN -CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEME -FEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN -PS +MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPK +LIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDW +SRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL +GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYT +MNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRS +YALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE +RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVD +IDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIIL +KWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL +KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEK +VVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGL +IVLYEVSYRRYGDEELHLCVSRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG +PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSREKITLLRELGQGSFGMVYEG +NARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRP +EAENNPGRPPPTLQEMIQMAAEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV +RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDNCPERVTDLMRMCWQFNPKMR +PTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEMEFEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIP +YTHMNGGKKNGRILTLPRSNPS >sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 -MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY -VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG -GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP -EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES -ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI -YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA +MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA +VADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFT +WVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES +ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTT +ICCGKNPLGDDEASATVSKTETSQVAPA diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/tblastn_four_human_vs_rhodopsin.html --- a/test-data/tblastn_four_human_vs_rhodopsin.html Mon Dec 01 05:59:16 2014 -0500 +++ b/test-data/tblastn_four_human_vs_rhodopsin.html Sun Jul 05 10:37:27 2015 -0400 @@ -3,7 +3,7 @@
 
-TBLASTN 2.2.29+
+TBLASTN 2.2.30+
 
 
 Query= sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/tblastn_four_human_vs_rhodopsin.xml
--- a/test-data/tblastn_four_human_vs_rhodopsin.xml	Mon Dec 01 05:59:16 2014 -0500
+++ b/test-data/tblastn_four_human_vs_rhodopsin.xml	Sun Jul 05 10:37:27 2015 -0400
@@ -2,7 +2,7 @@
 
 
   tblastn
-  TBLASTN 2.2.29+
+  TBLASTN 2.2.30+
   Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.
   
   Query_1
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.dbinfo.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.dbinfo.txt	Sun Jul 05 10:37:27 2015 -0400
@@ -0,0 +1,7 @@
+Database: Just 3 human mRNA sequences
+	3 sequences; 10,732 total bases
+
+Date: Dec 26, 2014  5:54 AM	Longest sequence: 4,796 bases
+
+Volumes:
+	/mnt/galaxy/galaxy_blast/test-data/three_human_mRNA.fasta
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.fasta.log.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta.log.txt	Sun Jul 05 10:37:27 2015 -0400
@@ -0,0 +1,5 @@
+New DB title:  Just 3 human mRNA sequences
+Sequence type: Nucleotide
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.fasta.nhd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta.nhd	Sun Jul 05 10:37:27 2015 -0400
@@ -0,0 +1,3 @@
+12956943350
+13082197871
+19180330422
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.fasta.nhi
Binary file test-data/three_human_mRNA.fasta.nhi has changed
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.fasta.nhr
Binary file test-data/three_human_mRNA.fasta.nhr has changed
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.fasta.nin
Binary file test-data/three_human_mRNA.fasta.nin has changed
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.fasta.nog
Binary file test-data/three_human_mRNA.fasta.nog has changed
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.fasta.nsd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta.nsd	Sun Jul 05 10:37:27 2015 -0400
@@ -0,0 +1,3 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.fasta.nsi
Binary file test-data/three_human_mRNA.fasta.nsi has changed
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/three_human_mRNA.fasta.nsq
Binary file test-data/three_human_mRNA.fasta.nsq has changed
diff -r 2fe07f50a41e -r c16c30e9ad5b test-data/tool_data_table_conf.xml.test
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tool_data_table_conf.xml.test	Sun Jul 05 10:37:27 2015 -0400
@@ -0,0 +1,15 @@
+
+    
+    
+        value, name, path
+        
+    
+ + value, name, path + +
+ + value, name, path + +
+
diff -r 2fe07f50a41e -r c16c30e9ad5b tool-data/blastdb.loc.sample --- a/tool-data/blastdb.loc.sample Mon Dec 01 05:59:16 2014 -0500 +++ b/tool-data/blastdb.loc.sample Sun Jul 05 10:37:27 2015 -0400 @@ -1,39 +1,44 @@ -#This is a sample file distributed with Galaxy that is used to define a -#list of nucleotide BLAST databases, using three columns tab separated -#(longer whitespace are TAB characters): +# This is a sample file distributed with Galaxy that is used to define a +# list of nucleotide BLAST databases, using three columns tab separated: # -# +# {tab}{tab} +# +# The captions typically contain spaces and might end with the build date. +# It is important that the actual database name does not have a space in +# it, and that there are only two tabs on each line. # -#The captions typically contain spaces and might end with the build date. -#It is important that the actual database name does not have a space in -#it, and that there are only two tabs on each line. +# You can download the NCBI provided protein databases like NR from here: +# ftp://ftp.ncbi.nlm.nih.gov/blast/db/ # -#So, for example, if your database is nt and the path to your base name -#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry -#would look like this: -# -#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk -# -#and your /depot/data2/galaxy/blastdb/nt directory would contain all of -#your "base names" (e.g.): +# For simplicity, many Galaxy servers are configured to offer just a live +# version of each NCBI BLAST database (updated with the NCBI provided +# Perl scripts or similar). In this case, we recommend using the case +# sensistive base-name of the NCBI BLAST databases as the unique id. +# Consistent naming is important for sharing workflows between Galaxy +# servers. # -#-rw-r--r-- 1 wychung galaxy 23437408 2008-04-09 11:26 nt.chunk.00.nhr -#-rw-r--r-- 1 wychung galaxy 3689920 2008-04-09 11:26 nt.chunk.00.nin -#-rw-r--r-- 1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq -#...etc... +# For example, consider the NCBI partially non-redundant nucleotide +# nt BLAST database, where you have downloaded and decompressed the +# files under /data/blastdb/ meaning at the command line BLAST+ would +# would look at the files /data/blastdb/nt.n* when run with: # -#Your blastdb.loc file should include an entry per line for each "base name" -#you have stored. For example: +# $ blastn -db /data/blastdb/nt -query ... +# +# In this case use nr (lower case to match the NCBI file naming) as the +# unique id in the first column of blastdb_p.loc, giving an entry like +# this: +# +# nt{tab}NCBI partially non-redundant (nt){tab}/data/blastdb/nt # -#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk -#wgs_30_Nov_2009 wgs 30 Nov 2009 /depot/data2/galaxy/blastdb/wgs/wgs.chunk -#test_20_Sep_2008 test 20 Sep 2008 /depot/data2/galaxy/blastdb/test/test -#...etc... +# Alternatively, rather than a "live" mirror of the NCBI databases which +# are updated automatically, for full reproducibility the Galaxy Team +# recommend saving date-stamped copies of the databases. In this case +# your blastdb.loc file should include an entry per line for each +# version you have stored. For example: # -#You can download the NCBI provided protein databases like NT from here: -#ftp://ftp.ncbi.nlm.nih.gov/blast/db/ +# nt_05Jun2010{tab}NCBI nt (partially non-redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nt +# nt_15Aug2010{tab}NCBI nt (partially non-redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nt +# ...etc... # -#See also blastdb_p.loc which is for any protein BLAST database, and -#blastdb_d.loc which is for any protein domains databases (like CDD). - - +# See also blastdb_p.loc which is for any protein BLAST database, and +# blastdb_d.loc which is for any protein domains databases (like CDD). diff -r 2fe07f50a41e -r c16c30e9ad5b tool-data/blastdb_d.loc.sample --- a/tool-data/blastdb_d.loc.sample Mon Dec 01 05:59:16 2014 -0500 +++ b/tool-data/blastdb_d.loc.sample Sun Jul 05 10:37:27 2015 -0400 @@ -1,35 +1,57 @@ -#This is a sample file distributed with Galaxy that is used to define a -#list of protein domain databases, using three columns tab separated -#(longer whitespace are TAB characters): +# This is a sample file distributed with Galaxy that is used to define a +# list of protein domain databases, using three columns tab separated +# (longer whitespace are TAB characters): +# +# {tab}{tab} # -# +# The captions typically contain spaces and might end with the build date. +# It is important that the actual database name does not have a space in +# it, and that there are only two tabs on each line. +# +# You can download the NCBI provided databases as tar-balls from here: +# ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ # -#The captions typically contain spaces and might end with the build date. -#It is important that the actual database name does not have a space in it, -#and that there are only two tabs on each line. +# For simplicity, many Galaxy servers are configured to offer just a live +# version of each NCBI BLAST database (updated with the NCBI provided +# Perl scripts or similar). In this case, we recommend using the case +# sensistive base-name of the NCBI BLAST databases as the unique id. +# Consistent naming is important for sharing workflows between Galaxy +# servers. # -#You can download the NCBI provided databases as tar-balls from here: -#ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ +# For example, consider the NCBI Conserved Domains Database (CDD), where +# you have downloaded and decompressed the files under the directory +# /data/blastdb/domains/ meaning at the command line BLAST+ would be +# run as follows any would look at the files /data/blastdb/domains/Cdd.*: # -#So, for example, if your database is CDD and the path to your base name -#is /data/blastdb/Cdd, then the blastdb_d.loc entry would look like this: -# -#Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/Cdd +# $ rpsblast -db /data/blastdb/domains/Cdd -query ... # -#and your /data/blastdb directory would contain all of the files associated -#with the database, /data/blastdb/Cdd.*. +# In this case use Cdd (title case to match the NCBI file naming) as the +# unique id in the first column of blastdb_d.loc, giving an entry like +# this: # -#Your blastdb_d.loc file should include an entry per line for each "base name" -#you have stored. For example: +# Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/domains/Cdd +# +# Your blastdb_d.loc file should include an entry per line for each "base name" +# you have stored. For example: # -#Cdd NCBI CDD /data/blastdb/domains/Cdd -#Kog KOG (eukaryotes) /data/blastdb/domains/Kog -#Cog COG (prokaryotes) /data/blastdb/domains/Cog -#Pfam Pfam-A /data/blastdb/domains/Pfam -#Smart SMART /data/blastdb/domains/Smart -#Tigr TIGR /data/blastdb/domains/Tigr -#Prk Protein Clusters database /data/blastdb/domains/Prk -#...etc... +# Cdd{tab}NCBI CDD{tab}/data/blastdb/domains/Cdd +# Kog{tab}KOG (eukaryotes){tab}/data/blastdb/domains/Kog +# Cog{tab}COG (prokaryotes){tab}/data/blastdb/domains/Cog +# Pfam{tab}Pfam-A{tab}/data/blastdb/domains/Pfam +# Smart{tab}SMART{tab}/data/blastdb/domains/Smart +# Tigr{tab}TIGR /data/blastdb/domains/Tigr +# Prk{tab}Protein Clusters database{tab}/data/blastdb/domains/Prk +# ...etc... # -#See also blastdb.loc which is for any nucleotide BLAST database, and -#blastdb_p.loc which is for any protein BLAST databases. +# Alternatively, rather than a "live" mirror of the NCBI databases which +# are updated automatically, for full reproducibility the Galaxy Team +# recommend saving date-stamped copies of the databases. In this case +# your blastdb_d.loc file should include an entry per line for each +# version you have stored. For example: +# +# Cdd_05Jun2010{tab}NCBI CDD 05 Jun 2010{tab}/data/blastdb/domains/05Jun2010/Cdd +# Cdd_15Aug2010{tab}NCBI CDD 15 Aug 2010{tab}/data/blastdb/domains/15Aug2010/Cdd +# ...etc... +# +# See also blastdb.loc which is for any nucleotide BLAST database, and +# blastdb_p.loc which is for any protein BLAST databases. diff -r 2fe07f50a41e -r c16c30e9ad5b tool-data/blastdb_p.loc.sample --- a/tool-data/blastdb_p.loc.sample Mon Dec 01 05:59:16 2014 -0500 +++ b/tool-data/blastdb_p.loc.sample Sun Jul 05 10:37:27 2015 -0400 @@ -1,30 +1,44 @@ -#This is a sample file distributed with Galaxy that is used to define a -#list of protein BLAST databases, using three columns tab separated -#(longer whitespace are TAB characters): +# This is a sample file distributed with Galaxy that is used to define a +# list of protein BLAST databases, using three columns tab separated: # -# +# {tab}{tab} +# +# The captions typically contain spaces and might end with the build date. +# It is important that the actual database name does not have a space in +# it, and that there are only two tabs on each line. # -#The captions typically contain spaces and might end with the build date. -#It is important that the actual database name does not have a space in -#it, and that there are only two tabs on each line. +# You can download the NCBI provided protein databases like NR from here: +# ftp://ftp.ncbi.nlm.nih.gov/blast/db/ # -#So, for example, if your database is NR and the path to your base name -#is /data/blastdb/nr, then the blastdb_p.loc entry would look like this: -# -#nr{tab}NCBI NR (non redundant){tab}/data/blastdb/nr +# For simplicity, many Galaxy servers are configured to offer just a live +# version of each NCBI BLAST database (updated with the NCBI provided +# Perl scripts or similar). In this case, we recommend using the case +# sensistive base-name of the NCBI BLAST databases as the unique id. +# Consistent naming is important for sharing workflows between Galaxy +# servers. # -#and your /data/blastdb directory would contain all of the files associated -#with the database, /data/blastdb/nr.*. +# For example, consider the NCBI "non-redundant" protein BLAST database +# where you have downloaded and decompressed the files under /data/blastdb/ +# meaning at the command line BLAST+ would be run with something like +# which would look at the files /data/blastdb/nr.p*: # -#Your blastdb_p.loc file should include an entry per line for each "base name" -#you have stored. For example: +# $ blastp -db /data/blastdb/nr -query ... +# +# In this case use nr (lower case to match the NCBI file naming) as the +# unique id in the first column of blastdb_p.loc, giving an entry like +# this: +# +# nr{tab}NCBI non-redundant (nr){tab}/data/blastdb/nr # -#nr_05Jun2010 NCBI NR (non redundant) 05 Jun 2010 /data/blastdb/05Jun2010/nr -#nr_15Aug2010 NCBI NR (non redundant) 15 Aug 2010 /data/blastdb/15Aug2010/nr -#...etc... +# Alternatively, rather than a "live" mirror of the NCBI databases which +# are updated automatically, for full reproducibility the Galaxy Team +# recommend saving date-stamped copies of the databases. In this case +# your blastdb_p.loc file should include an entry per line for each +# version you have stored. For example: # -#You can download the NCBI provided protein databases like NR from here: -#ftp://ftp.ncbi.nlm.nih.gov/blast/db/ +# nr_05Jun2010{tab}NCBI NR (non redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nr +# nr_15Aug2010{tab}NCBI NR (non redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nr +# ...etc... # -#See also blastdb.loc which is for any nucleotide BLAST database, and -#blastdb_d.loc which is for any protein domains databases (like CDD). +# See also blastdb.loc which is for any nucleotide BLAST database, and +# blastdb_d.loc which is for any protein domains databases (like CDD). diff -r 2fe07f50a41e -r c16c30e9ad5b tool-data/tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/tool_data_table_conf.xml.sample Sun Jul 05 10:37:27 2015 -0400 @@ -0,0 +1,14 @@ + + + value, name, path + +
+ + value, name, path + +
+ + value, name, path + +
+
diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/README.rst --- a/tools/ncbi_blast_plus/README.rst Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/README.rst Sun Jul 05 10:37:27 2015 -0400 @@ -7,7 +7,7 @@ See the licence text below. -Currently tested with NCBI BLAST 2.2.29+ (i.e. version 2.2.29 of BLAST+), +Currently tested with NCBI BLAST 2.2.30+ (i.e. version 2.2.30 of BLAST+), and does not work with the NCBI 'legacy' BLAST suite (e.g. ``blastall``). Note that these wrappers (and the associated datatypes) were originally @@ -19,6 +19,25 @@ These wrappers are available from the Galaxy Tool Shed at: http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +In-development test releases are available from the Test Tool Shed at: +http://testtoolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus/ + + +Citation +======== + +Please cite the following paper (currently available as a preprint): + +NCBI BLAST+ integrated into Galaxy. +P.J.A. Cock, J.M. Chilton, B. Gruening, J.E. Johnson, N. Soranzo +bioRxiv DOI: http://dx.doi.org/10.1101/014043 (preprint) + +You should also cite the NCBI BLAST+ tools: + +BLAST+: architecture and applications. +C. Camacho et al. BMC Bioinformatics 2009, 10:421. +DOI: http://dx.doi.org/10.1186/1471-2105-10-421 + Automated Installation ====================== @@ -35,8 +54,7 @@ For those not using Galaxy's automated installation from the Tool Shed, put the XML and Python files in the ``tools/ncbi_blast_plus/`` folder and add the -XML files to your ``tool_conf.xml`` as normal (and do the same in -``tool_conf.xml.sample`` in order to run the unit tests). For example, use:: +XML files to your ``tool_conf.xml`` as normal. For example, use::
@@ -61,15 +79,16 @@ As described above for an automated installation, you must also tell Galaxy about any system level BLAST databases using the ``tool-data/blastdb*.loc`` -files. +files. Also merge the ``tool-data/tool_data_table_conf.xml.sample`` contents +into your ``tool_data_table_conf.xml`` file. You must install the NCBI BLAST+ standalone tools somewhere on the system -path. Currently the unit tests are written using BLAST 2.2.29+. +path. Currently the unit tests are written using BLAST+ 2.2.30. Run the functional tests (adjusting the section identifier to match your ``tool_conf.xml.sample`` file):: - ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools + ./run_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools Configuration ============= @@ -157,7 +176,7 @@ - Set number of threads via ``$GALAXY_SLOTS`` environment variable. - More descriptive default output names. - Tests require updated BLAST DB definitions (``blast_datatypes`` v0.0.18). - - Pre-check for duplicate identifiers in makeblastdb wrapper. + - Pre-check for duplicate identifiers in ``makeblastdb`` wrapper. - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27. - Now depends on ``package_blast_plus_2_2_28`` in ToolShed. - Extended tabular output includes 'salltitles' as column 25. @@ -170,8 +189,8 @@ - More detailed descriptions for BLASTN and BLASTP task option. - Wrappers for segmasker, dustmasker and convert2blastmask (contribution from Bjoern Gruening). - - Supports using maskinfo with makeblastdb wrapper. - - Supports setting a taxonomy ID in makeblastdb wrapper. + - Supports using maskinfo with ``makeblastdb`` wrapper. + - Supports setting a taxonomy ID in ``makeblastdb`` wrapper. - Subtle changes like new conditional settings will require some old workflows be updated to cope. v0.1.01 - Requires ``blastdbd`` datatype (``blast_datatypes`` v0.0.19). @@ -181,6 +200,21 @@ domain database from the user's history. - Tool definitions now embed citation information (by John Chilton). - BLAST tools support GI and SeqID filters (added by Bjoern Gruening). +v0.1.02 - Now depends on ``package_blast_plus_2_2_30`` in ToolShed. + - Tests updated for BLAST+ 2.2.30 instead of BLAST+ 2.2.29. + - New tasks ``blastp-fast``, ``blastx-fast`` and ``tblastn-fast``. + - New minimum query HSP coverage option, ``-qcov_hsp_perc``. + - Removed ``-word_size`` from RPS-BLAST and RPS-TBLASTN wrappers, this + is set during database construction and should not have been offered + as a command line option in releases prior to BLAST+ 2.2.30. + - BLAST database ``blastdb*.loc`` files now accessed via the XML + table definitions in Galaxy's ``tool_data_table_conf.xml`` file, + setup via ``tool-data/tool_data_table_conf.xml.sample`` + - Replace ``.extra_files_path`` with ``.files_path`` (internal change, + thanks to Bjoern Gruening and John Chilton). + - Added "NCBI BLAST+ integrated into Galaxy" preprint citation. +v0.1.03 - Reorder XML elements (internal change only). + - Planemo for Tool Shed upload (``.shed.yml``, internal change only). ======= ====================================================================== @@ -201,10 +235,28 @@ As of July 2013, development is continuing on a dedicated GitHub repository: https://github.com/peterjc/galaxy_blast -For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use -the following command from the GitHub repository root folder:: +For pushing a release to the test or main "Galaxy Tool Shed", use the following +Planemo commands (which requires you have set your Tool Shed access details in +``~/.planemo.yml`` and that you have access rights on the Tool Shed):: + + $ planemo shed_update --shed_target testtoolshed --check_diff ~/repositories/galaxy_blast/tools/ncbi_blast_plus/ + ... + +or:: - $ tools/ncbi_blast_plus/make_ncbi_blast_plus.sh + $ planemo shed_update --shed_target toolshed --check_diff ~/repositories/galaxy_blast/tools/ncbi_blast_plus/ + ... + +To just build and check the tar ball, use:: + + $ planemo shed_upload --tar_only ~/repositories/galaxy_blast/tools/ncbi_blast_plus/ + ... + $ tar -tzf shed_upload.tar.gz + test-data/blastdb.loc + ... + tools/ncbi_blast_plus/tool_dependencies.xml + $ tar -tzf shed_upload.tar.gz | wc -l + 117 This simplifies ensuring a consistent set of files is bundled each time, including all the relevant test files. diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/blastxml_to_tabular.py --- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Sun Jul 05 10:37:27 2015 -0400 @@ -1,9 +1,9 @@ #!/usr/bin/env python """Convert a BLAST XML file to tabular output. -Takes three command line options, input BLAST XML filename, output tabular -BLAST filename, output format (std for standard 12 columns, or ext for the -extended 24 columns offered in the BLAST+ wrappers). +Designed to convert BLAST XML files into tabular BLAST output (either +std for standard 12 columns, or ext for the extended 25 columns offered +in the Galaxy BLAST+ wrappers). The 12 columns output are 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore' or 'std' at the BLAST+ command line, which @@ -66,7 +66,7 @@ from optparse import OptionParser if "-v" in sys.argv or "--version" in sys.argv: - print "v0.1.01" + print "v0.1.04" sys.exit(0) if sys.version_info[:2] >= ( 2, 5 ): @@ -162,7 +162,7 @@ blast_program = None # get an iterable try: - context = ElementTree.iterparse(in_file, events=("start", "end")) + context = ElementTree.iterparse(blastxml_filename, events=("start", "end")) except: stop_err("Invalid data format.") # turn it into an iterator @@ -321,7 +321,7 @@ #Only a subset of the columns are needed values = [values[colnames.index(c)] for c in cols] #print "\t".join(values) - outfile.write("\t".join(values) + "\n") + output_handle.write("\t".join(values) + "\n") # prevents ElementTree from growing large datastructure root.clear() elem.clear() diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/blastxml_to_tabular.xml --- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,5 +1,10 @@ - + Convert BLAST XML output to tabular + + + + + blastxml_to_tabular.py --version blastxml_to_tabular.py -o "$tabular_file" @@ -9,13 +14,8 @@ #else -c "$output.out_format" #end if -#for i in $blastxml_file#${i} #end for# +#for i in $blastxml_file#"${i}" #end for# - - - - - @@ -62,8 +62,6 @@ - - diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml --- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,20 +1,34 @@ - + Show BLAST database information from blastdbcmd blastdbcmd ncbi_macros.xml - + blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info" - + + + **What it does** diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,10 +1,10 @@ - + Extract sequence(s) from BLAST database blastdbcmd ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -47,7 +47,6 @@ | sed 's/>\(lcl|\|gnl|BL_ORD_ID|[0-9]* \)/>/1' > "$seq" #end if - @@ -70,6 +69,19 @@ + + + **What it does** diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,4 +1,4 @@ - + Search nucleotide database with nucleotide query sequence(s) @@ -6,7 +6,7 @@ blastn ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -19,23 +19,21 @@ @THREADS@ #if $adv_opts.adv_opts_selector=="advanced": $adv_opts.strand -@ADVANCED_OPTIONS@ +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ #if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ): -perc_identity $adv_opts.identity_cutoff #end if $adv_opts.ungapped @ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ ## End of advanced options: #end if - - - - - @@ -64,6 +62,7 @@ + @@ -104,6 +103,27 @@ + diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,4 +1,4 @@ - + Search protein database with protein query sequence(s) @@ -6,7 +6,7 @@ blastp ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -19,16 +19,16 @@ @THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -matrix $adv_opts.matrix -@ADVANCED_OPTIONS@ +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ ##Ungapped disabled for now - see comments below ##$adv_opts.ungapped @ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ ## End of advanced options: #end if - - - @@ -36,6 +36,7 @@ + @@ -54,6 +55,7 @@ --> + @@ -76,6 +78,7 @@ + @@ -92,6 +95,7 @@ + @@ -108,6 +112,7 @@ + diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,4 +1,4 @@ - + Search protein database with translated nucleotide query sequence(s) @@ -6,7 +6,7 @@ blastx ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -14,26 +14,31 @@ -query "$query" @BLAST_DB_SUBJECT@ -query_gencode $query_gencode +-task $blast_type -evalue $evalue_cutoff @BLAST_OUTPUT@ @THREADS@ #if $adv_opts.adv_opts_selector=="advanced": $adv_opts.strand -matrix $adv_opts.matrix -@ADVANCED_OPTIONS@ +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ $adv_opts.ungapped @ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ ## End of advanced options: #end if - - - + + + + @@ -47,6 +52,7 @@ + diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,10 +1,10 @@ - + Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb convert2blastmask ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -16,7 +16,6 @@ -out "$outfile" -outfmt $outformat - @@ -80,7 +79,7 @@ **References** If you use this Galaxy tool in work leading to a scientific publication please -cite the following papers (a more specific paper covering this wrapper is planned): +cite the following papers: @REFERENCES@ diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,11 +1,11 @@ - + masks low complexity regions dustmasker ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -13,14 +13,13 @@ #if $db_opts.db_opts_selector == "db": -in "${db_opts.database.fields.path}" -infmt blastdb #elif $db_opts.db_opts_selector == "histdb": - -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb + -in "${os.path.join($db_opts.histdb.files_path, 'blastdb')}" -infmt blastdb #else: -in "$subject" -infmt fasta #end if -out "$outfile" -window $window -level $level -linker $linker -outfmt $outformat - @@ -92,7 +91,7 @@ **References** If you use this Galaxy tool in work leading to a scientific publication please -cite the following papers (a more specific paper covering this wrapper is planned): +cite the following papers: @REFERENCES@ diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_macros.xml --- a/tools/ncbi_blast_plus/ncbi_macros.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,4 +1,20 @@ + + + @BINARY@ + blast+ + + + + + + + + + + + @BINARY@ -version + @@ -59,7 +75,7 @@ - + @@ -109,17 +125,6 @@ - - - - - - - - - - - @@ -175,11 +180,7 @@ - - - - - + @@ -205,11 +206,7 @@ - - - - - + @@ -234,11 +231,7 @@ - - - - - + @@ -258,20 +251,12 @@ - - - - - + - - - - - + @@ -305,12 +290,8 @@ - - - @BINARY@ - blast+ - - @BINARY@ -version + + @@ -330,9 +311,9 @@ label="Restrict search of database to a given set of ID's" help="This feature provides a means to exclude ID's from a BLAST database search. The expectation values in the BLAST results are based upon the sequences actually searched, and not on the underlying database. Note this cannot be used when comparing against a FASTA file."> - - - + + + @@ -349,6 +330,11 @@ + +#if float(str($adv_opts.qcov_hsp_perc)) > 0: + -qcov_hsp_perc $adv_opts.qcov_hsp_perc +#end if + #if $adv_opts.adv_optional_id_files_opts.adv_optional_id_files_opts_selector == 'negative_gilist': -negative_gilist $adv_opts.adv_optional_id_files_opts.negative_gilist @@ -363,7 +349,7 @@ #if $db_opts.db_opts_selector == "db": -db "${db_opts.database.fields.path}" #elif $db_opts.db_opts_selector == "histdb": - -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" + -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}" #else: -subject "$db_opts.subject" #end if @@ -381,12 +367,22 @@ -outfmt $output.out_format #end if - $adv_opts.filter_query + $adv_opts.filter_query + ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string -## Note -max_target_seqs overrides -num_descriptions and -num_alignments +## Note -max_target_seqs used to simply override -num_descriptions and -num_alignments +## but this was changed in BLAST+ 2.2.27 onwards to force their use (raised with NCBI) #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): +#if str($output.out_format) in ["6", "ext", "cols", "5"]: +## Most output formats use this, including tabular and XML: -max_target_seqs $adv_opts.max_hits +#else +## Text and HTML output formats 0-4 currently need this instead: +-num_descriptions $adv_opts.max_hits -num_alignments $adv_opts.max_hits #end if +#end if + + #if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): -word_size $adv_opts.word_size #end if @@ -401,14 +397,18 @@ ${db_opts.subject.name} #end if +Peter J. A. Cock, John M. Chilton, Björn Grüning, James E. Johnson, Nicola Soranzo (2015). +NCBI BLAST+ integrated into Galaxy. *BioRxiv* preprint. +http://dx.doi.org/10.1101/014043 + Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 +in molecular plant pathology. *PeerJ* 1:e167 http://dx.doi.org/10.7717/peerj.167 Christiam Camacho et al. (2009). BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. +*BMC Bioinformatics*. 15;10:421. http://dx.doi.org/10.1186/1471-2105-10-421 This wrapper is available to install into other Galaxy Instances via the Galaxy @@ -418,7 +418,9 @@ 10.1186/1471-2105-10-421 10.7717/peerj.167 - + + 10.1101/014043 + **Output format** diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_makeblastdb.xml --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,16 +1,16 @@ - + Make BLAST database makeblastdb ncbi_macros.xml - + check_no_duplicates.py ##First check for duplicates (since BLAST+ 2.2.28 fails to do so) ##and abort (via the ampersand ampersand trick) if any are found. #for i in $input_file#"${i}" #end for# && -makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" +makeblastdb -out "${os.path.join($outfile.files_path,'blastdb')}" $parse_seqids $hash_index ## Single call to -in with multiple filenames space separated with outer quotes @@ -47,7 +47,6 @@ ## Capture the stdout log information to the primary file (plain text): > "$outfile" - @@ -97,6 +96,7 @@ @@ -6,7 +6,7 @@ deltablast ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -15,19 +15,18 @@ #if $db_opts.db_opts_selector == "db": -db "${db_opts.database.fields.path}" #elif $db_opts.db_opts_selector == "histdb": - -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" + -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}" #end if -evalue $evalue_cutoff @BLAST_OUTPUT@ @THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -@ADVANCED_OPTIONS@ +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_QCOV_HSP_PERC@ ## End of advanced options: #end if - - - @@ -41,8 +40,8 @@ - + @@ -52,6 +51,19 @@ + + + @SEARCH_TIME_WARNING@ diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,4 +1,4 @@ - + Search protein domain database (PSSMs) with translated nucleotide query sequence(s) @@ -6,7 +6,7 @@ rpstblastn ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -15,18 +15,19 @@ #if $db_opts.db_opts_selector == "db": -db "${db_opts.database.fields.path}" #elif $db_opts.db_opts_selector == "histdb": - -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" + -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}" #end if -evalue $evalue_cutoff @BLAST_OUTPUT@ ## rpstblastn does not support multiple threads up to release 2.2.27+. Added in BLAST 2.2.28+. ##-num_threads 8 #if $adv_opts.adv_opts_selector=="advanced": -@ADVANCED_OPTIONS@ +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_QCOV_HSP_PERC@ ## End of advanced options: #end if - @@ -41,8 +42,8 @@ - + @@ -50,6 +51,19 @@ + + + @SEARCH_TIME_WARNING@ diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,10 +1,10 @@ - + low-complexity regions in protein sequences segmasker ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -12,7 +12,7 @@ #if $db_opts.db_opts_selector == "db": -in "${db_opts.database.fields.path}" -infmt blastdb #elif $db_opts.db_opts_selector == "histdb": - -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb + -in "${os.path.join($db_opts.histdb.files_path, 'blastdb')}" -infmt blastdb #else: -in "$subject" -infmt fasta #end if @@ -22,7 +22,6 @@ -hicut $hicut -outfmt $outformat - @@ -94,7 +93,7 @@ **References** If you use this Galaxy tool in work leading to a scientific publication please -cite the following papers (a more specific paper covering this wrapper is planned): +cite the following papers: @REFERENCES@ diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,4 +1,4 @@ - + Search translated nucleotide database with protein query sequence(s) @@ -6,35 +6,40 @@ tblastn ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces tblastn -query "$query" @BLAST_DB_SUBJECT@ +-task $blast_type -evalue $evalue_cutoff @BLAST_OUTPUT@ @THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -db_gencode $adv_opts.db_gencode -matrix $adv_opts.matrix -@ADVANCED_OPTIONS@ +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ ##Ungapped disabled for now - see comments below ##$adv_opts.ungapped @ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ ## End of advanced options: #end if - - - + + + + + - @@ -51,6 +56,7 @@ --> + diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,4 +1,4 @@ - + Search translated nucleotide database with translated nucleotide query sequence(s) @@ -6,7 +6,7 @@ tblastx ncbi_macros.xml - + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -21,16 +21,14 @@ -db_gencode $adv_opts.db_gencode $adv_opts.strand -matrix $adv_opts.matrix -## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string -## Note -max_target_seqs overrides -num_descriptions and -num_alignments -@ADVANCED_OPTIONS@ +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ @ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ ## End of advanced options: #end if - - - @@ -51,6 +49,7 @@ + diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/repository_dependencies.xml --- a/tools/ncbi_blast_plus/repository_dependencies.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/repository_dependencies.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,4 +1,4 @@ - + diff -r 2fe07f50a41e -r c16c30e9ad5b tools/ncbi_blast_plus/tool_dependencies.xml --- a/tools/ncbi_blast_plus/tool_dependencies.xml Mon Dec 01 05:59:16 2014 -0500 +++ b/tools/ncbi_blast_plus/tool_dependencies.xml Sun Jul 05 10:37:27 2015 -0400 @@ -1,6 +1,6 @@ - - + +