# HG changeset patch
# User peterjc
# Date 1379931253 14400
# Node ID 70e7dcbf6573e5f4d9f5096b0de7b5b4e5998935
# Parent 9dabbfd73c8ac5b9b08a02ffa9a1927e11494a4e
Uploaded v0.0.20, handles dependencies via package_blast_plus_2_2_26, development moved to GitHub, RST README, MIT licence, citation information, more tests, percentage identity option to BLASTN, cElementTree to ElementTree fallback.
diff -r 9dabbfd73c8a -r 70e7dcbf6573 test-data/blastn_rhodopsin_vs_three_human.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human.tabular Mon Sep 23 06:14:13 2013 -0400
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.10 1050 77 6 1 1047 88 1134 0.0 1474
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 9e-133 460
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 7e-94 331
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 8e-74 265
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.98 171 10 2 2854 3023 615 784 8e-69 248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.58 962 75 6 1 959 118 1076 0.0 1323
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.55 1052 121 10 1 1047 88 1134 0.0 1208
diff -r 9dabbfd73c8a -r 70e7dcbf6573 test-data/blastp_four_human_vs_rhodopsin_converted.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin_converted.tabular Thu Apr 25 09:38:37 2013 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin_converted.tabular Mon Sep 23 06:14:13 2013 -0400
@@ -3,4 +3,4 @@
sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653
sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631
sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673
-sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 3e-176 599
+sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599
diff -r 9dabbfd73c8a -r 70e7dcbf6573 test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Thu Apr 25 09:38:37 2013 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Mon Sep 23 06:14:13 2013 -0400
@@ -3,4 +3,4 @@
sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328
sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328
sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347
-sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 3e-176 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354
+sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354
diff -r 9dabbfd73c8a -r 70e7dcbf6573 test-data/blastx_rhodopsin_vs_four_human_converted.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Thu Apr 25 09:38:37 2013 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Mon Sep 23 06:14:13 2013 -0400
@@ -1,10 +1,10 @@
gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662
-gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 1e-168 575
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-62 224
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-34 129
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 2e-31 120
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-30 118
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 6e-12 56.2
-gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 2e-180 613
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613
gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641
-gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 6e-164 559
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559
diff -r 9dabbfd73c8a -r 70e7dcbf6573 test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Thu Apr 25 09:38:37 2013 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Mon Sep 23 06:14:13 2013 -0400
@@ -1,10 +1,10 @@
gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662 sp|P08100|OPSD_HUMAN 1707 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348
-gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 1e-168 575 sp|P08100|OPSD_HUMAN 1481 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-62 224 sp|P08100|OPSD_HUMAN 570 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-34 129 sp|P08100|OPSD_HUMAN 324 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 2e-31 120 sp|P08100|OPSD_HUMAN 302 54 56 0 100.00 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE 4301 348
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-30 118 sp|P08100|OPSD_HUMAN 295 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 6e-12 56.2 sp|P08100|OPSD_HUMAN 134 23 24 0 96.00 1 0 QFRNCMLTTLCCGKNPLGDDEASTT QFRNCMLTTICCGKNPLGDDEASAT 4301 348
-gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 2e-180 613 sp|P08100|OPSD_HUMAN 1582 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575 sp|P08100|OPSD_HUMAN 1481 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224 sp|P08100|OPSD_HUMAN 570 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129 sp|P08100|OPSD_HUMAN 324 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120 sp|P08100|OPSD_HUMAN 302 54 56 0 100.00 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE 4301 348
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118 sp|P08100|OPSD_HUMAN 295 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2 sp|P08100|OPSD_HUMAN 134 23 24 0 96.00 1 0 QFRNCMLTTLCCGKNPLGDDEASTT QFRNCMLTTICCGKNPLGDDEASAT 4301 348
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613 sp|P08100|OPSD_HUMAN 1582 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348
gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641 sp|P08100|OPSD_HUMAN 1654 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348
-gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 6e-164 559 sp|P08100|OPSD_HUMAN 1440 272 307 0 92.47 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEED MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1344 348
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559 sp|P08100|OPSD_HUMAN 1440 272 307 0 92.47 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEED MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1344 348
diff -r 9dabbfd73c8a -r 70e7dcbf6573 test-data/blastx_sample.xml
diff -r 9dabbfd73c8a -r 70e7dcbf6573 test-data/tblastx_rhodopsin_vs_three_human.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastx_rhodopsin_vs_three_human.tabular Mon Sep 23 06:14:13 2013 -0400
@@ -0,0 +1,117 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 97.39 230 6 0 1 690 88 777 0.0 559
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.12 102 6 0 742 1047 829 1134 0.0 236
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 91.22 148 13 0 1046 603 1133 690 0.0 308
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.32 88 5 0 566 303 653 390 0.0 207
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 96.34 82 3 0 248 3 335 90 0.0 182
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 83.33 204 34 0 18 629 105 716 4e-158 404
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 75.28 89 22 0 780 1046 867 1133 4e-158 161
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.28 203 38 0 609 1 696 88 5e-153 360
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 80.60 67 13 0 916 716 1003 803 5e-153 135
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 70.27 37 11 0 1047 937 1134 1024 5e-153 64.2
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 100.00 7 0 0 646 626 733 713 5e-153 24.0
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.23 65 7 0 460 266 547 353 4e-105 167
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.58 48 5 0 184 41 271 128 4e-105 104
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 77.78 45 10 0 882 748 969 835 4e-105 93.9
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 67.86 28 9 0 1045 962 1132 1049 4e-105 51.9
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 59.09 22 9 0 586 521 673 608 4e-105 33.1
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.40 86 16 0 296 553 383 640 2e-87 185
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 84.38 32 5 0 11 106 98 193 2e-87 74.8
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 71.43 35 10 0 941 1045 1028 1132 2e-87 61.6
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.44 18 1 0 794 847 881 934 2e-87 50.1
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 83.61 238 39 0 18 731 64 777 0.0 507
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 82.35 85 15 0 783 1037 829 1083 0.0 188
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 70.96 303 88 0 925 17 971 63 2e-130 435
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 72.22 18 5 0 1027 974 1073 1020 2e-130 35.0
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 55.32 188 84 0 605 42 651 88 7e-89 245
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 61.11 72 28 0 1037 822 1083 868 7e-89 91.3
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 49.02 204 104 0 29 640 75 686 4e-78 197
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 66.04 53 18 0 860 1018 906 1064 4e-78 85.8
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 44.44 27 15 0 689 769 735 815 4e-78 32.2
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 47.47 198 104 0 633 40 679 86 4e-65 177
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 68.09 47 15 0 1017 877 1063 923 4e-65 80.3
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 57.89 114 48 0 265 606 311 652 3e-46 137
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 46.30 54 29 0 19 180 65 226 3e-46 52.4
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.40 111 4 0 1 333 118 450 0.0 264
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.31 65 5 0 3174 3368 829 1023 0.0 151
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.43 56 2 0 2855 3022 616 783 0.0 141
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.22 59 4 0 1404 1580 442 618 0.0 138
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.00 25 2 0 4222 4296 1021 1095 0.0 64.3
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 88.89 9 1 0 3128 3154 783 809 0.0 22.6
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 78.38 111 24 0 333 1 450 118 7e-171 212
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 88.75 80 9 0 3367 3128 1022 783 7e-171 161
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.33 60 4 0 1582 1403 620 441 7e-171 136
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.07 56 5 0 3021 2854 782 615 7e-171 119
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 84.62 26 4 0 4301 4224 1100 1023 7e-171 52.8
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 95.83 72 3 0 218 3 335 120 8e-142 152
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 77.78 63 14 0 3368 3180 1023 835 8e-142 125
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 85.11 47 7 0 1544 1404 582 442 8e-142 108
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.36 56 11 0 3022 2855 783 616 8e-142 101
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 75.86 29 7 0 325 239 442 356 8e-142 58.3
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 86.36 22 3 0 4287 4222 1086 1021 8e-142 48.7
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 90.91 11 1 0 3159 3127 814 782 8e-142 31.3
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 81.03 58 11 0 2854 3027 615 788 2e-122 128
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 83.33 60 10 0 1403 1582 441 620 2e-122 125
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.60 67 13 0 3 203 120 320 2e-122 119
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.30 23 2 0 4220 4288 1019 1087 2e-122 53.8
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 78.26 23 5 0 266 334 383 451 2e-122 48.3
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.00 20 4 0 3308 3367 963 1022 2e-122 46.0
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 83.33 18 3 0 3226 3279 881 934 2e-122 40.5
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.39 51 10 0 154 2 271 119 1e-92 97.3
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 73.68 57 15 0 3366 3196 1021 851 1e-92 95.9
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 81.82 44 8 0 1521 1390 559 428 1e-92 95.0
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 95.00 20 1 0 332 273 449 390 1e-92 52.4
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 86.36 22 3 0 4288 4223 1087 1022 1e-92 48.3
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 61.54 26 10 0 2912 2835 673 596 1e-92 37.7
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 69.57 23 7 0 3023 2955 784 716 1e-92 33.1
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 76.67 30 7 0 2 91 119 208 6e-43 68.0
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.11 45 4 0 1411 1545 449 583 6e-43 68.0
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 77.27 22 5 0 267 332 384 449 6e-43 45.1
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 86.36 22 3 0 4224 4289 1023 1088 6e-43 44.1
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 83.33 12 2 0 2856 2891 617 652 6e-43 25.4
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 95.91 220 9 0 1 660 118 777 0.0 526
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 92.13 89 7 0 712 978 829 1095 0.0 212
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.01 178 16 0 536 3 653 120 1e-178 353
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 88.32 137 16 0 983 573 1100 690 1e-178 277
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 77.43 319 72 0 3 959 120 1076 4e-174 593
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 79.07 129 27 0 558 172 675 289 2e-133 248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 76.83 82 19 0 963 718 1080 835 2e-133 159
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 84.09 44 7 0 133 2 250 119 2e-133 97.3
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 73.08 78 21 0 433 200 550 317 6e-102 145
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 70.15 67 20 0 799 599 916 716 6e-102 106
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 80.49 41 8 0 123 1 240 118 6e-102 84.5
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 77.78 27 6 0 553 473 670 590 6e-102 51.9
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 71.43 14 4 0 889 848 1006 965 6e-102 32.7
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 81.82 11 2 0 958 926 1075 1043 6e-102 28.6
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 84.85 33 5 0 239 337 356 454 4e-48 72.5
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 86.67 30 4 0 2 91 119 208 4e-48 71.2
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 81.40 43 8 0 404 532 521 649 4e-48 47.3
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 88.89 18 2 0 764 817 881 934 4e-48 44.6
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 87.50 8 1 0 935 958 1052 1075 4e-48 21.7
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 93.91 230 14 0 1 690 88 777 0.0 538
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 91.18 102 9 0 742 1047 829 1134 0.0 233
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 88.83 188 21 0 566 3 653 90 0.0 394
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 84.06 138 22 0 1046 633 1133 720 0.0 260
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.11 228 75 0 684 1 771 88 7e-132 333
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.27 110 36 0 1045 716 1132 803 7e-132 141
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 70.20 151 45 0 3 455 90 542 1e-128 236
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 64.04 89 32 0 780 1046 867 1133 1e-128 136
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.22 74 25 0 510 731 597 818 1e-128 111
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.04 106 36 0 242 559 329 646 2e-58 161
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 85.71 21 3 0 92 154 179 241 2e-58 53.8
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 73.68 19 5 0 791 847 878 934 2e-58 39.1
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 61.29 62 24 0 424 239 511 326 4e-55 81.3
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 74.36 39 10 0 133 17 220 104 4e-55 69.8
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 65.71 35 12 0 882 778 969 865 4e-55 56.3
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 58.14 43 18 0 649 521 736 608 4e-55 50.6
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.67 12 4 0 972 937 1059 1024 4e-55 23.9
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 82.13 235 42 0 11 715 76 780 0.0 498
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 78.31 83 18 0 770 1018 835 1083 0.0 177
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 72.29 332 92 0 1017 22 1082 87 1e-150 516
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 48.30 147 76 0 712 272 777 337 2e-98 169
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 54.17 72 33 0 1030 815 1095 880 2e-98 103
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 47.83 69 36 0 220 14 285 79 2e-98 83.5
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 72.00 25 7 0 782 708 847 773 2e-98 45.1
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 56.00 75 33 0 532 756 597 821 5e-65 87.7
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 39.42 104 63 0 19 330 84 395 5e-65 86.8
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 56.90 58 25 0 829 1002 894 1067 5e-65 81.3
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 50.00 30 15 0 388 477 453 542 5e-65 33.6
diff -r 9dabbfd73c8a -r 70e7dcbf6573 test-data/three_human_mRNA.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta Mon Sep 23 06:14:13 2013 -0400
@@ -0,0 +1,183 @@
+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.
+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG
+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC
+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC
+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA
+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC
+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT
+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA
+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA
+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA
+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC
+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA
+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT
+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG
+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA
+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA
+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG
+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA
+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC
+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT
+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA
+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC
+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA
+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG
+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC
+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC
+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA
+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT
+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA
+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT
+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT
+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA
+TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA
+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA
+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG
+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG
+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT
+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT
+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT
+TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG
+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA
+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT
+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC
+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT
+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT
+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT
+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT
+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT
+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA
+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA
+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA
+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT
+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT
+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT
+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT
+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA
+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT
+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT
+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT
+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT
+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG
+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC
+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT
+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA
+GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT
+TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT
+GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT
+TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG
+GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA
+CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA
+CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA
+GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA
+GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT
+GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG
+GAAACTTTTAGATGACATTCTACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCA
+ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA
+AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA
+ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA
+GGTTGTCAAGAAGGCTTTTTTTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGAT
+AAAGTAGTGTAATGGATTGACAATCAGGAAGAACAGAATAACTCAGTTTTTTTTTCTCCT
+ACAAGGAGATATGGCTGGACCAAAATAAAATGACATGAAATTGCAAAAATGAAAAT
+>ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.
+GGGGGGCTGCGCGGCCGGGTCGGTGCGCACACGAGAAGGACGCGCGGCCCCCAGCGCTCT
+TGGGGGCCGCCTCGGAGCATGACCCCCGCGGGCCAGCGCCGCGCGCCTGATCCGAGGAGA
+CCCCGCGCTCCCGCAGCCATGGGCACCGGGGGCCGGCGGGGGGCGGCGGCCGCGCCGCTG
+CTGGTGGCGGTGGCCGCGCTGCTACTGGGCGCCGCGGGCCACCTGTACCCCGGAGAGGTG
+TGTCCCGGCATGGATATCCGGAACAACCTCACTAGGTTGCATGAGCTGGAGAATTGCTCT
+GTCATCGAAGGACACTTGCAGATACTCTTGATGTTCAAAACGAGGCCCGAAGATTTCCGA
+GACCTCAGTTTCCCCAAACTCATCATGATCACTGATTACTTGCTGCTCTTCCGGGTCTAT
+GGGCTCGAGAGCCTGAAGGACCTGTTCCCCAACCTCACGGTCATCCGGGGATCACGACTG
+TTCTTTAACTACGCGCTGGTCATCTTCGAGATGGTTCACCTCAAGGAACTCGGCCTCTAC
+AACCTGATGAACATCACCCGGGGTTCTGTCCGCATCGAGAAGAACAATGAGCTCTGTTAC
+TTGGCCACTATCGACTGGTCCCGTATCCTGGATTCCGTGGAGGATAATCACATCGTGTTG
+AACAAAGATGACAACGAGGAGTGTGGAGACATCTGTCCGGGTACCGCGAAGGGCAAGACC
+AACTGCCCCGCCACCGTCATCAACGGGCAGTTTGTCGAACGATGTTGGACTCATAGTCAC
+TGCCAGAAAGTTTGCCCGACCATCTGTAAGTCACACGGCTGCACCGCCGAAGGCCTCTGT
+TGCCACAGCGAGTGCCTGGGCAACTGTTCTCAGCCCGACGACCCCACCAAGTGCGTGGCC
+TGCCGCAACTTCTACCTGGACGGCAGGTGTGTGGAGACCTGCCCGCCCCCGTACTACCAC
+TTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAGGACCTGCACCACAAATGCAAG
+AACTCGCGGAGGCAGGGCTGCCACCAATACGTCATTCACAACAACAAGTGCATCCCTGAG
+TGTCCCTCCGGGTACACGATGAATTCCAGCAACTTGCTGTGCACCCCATGCCTGGGTCCC
+TGTCCCAAGGTGTGCCACCTCCTAGAAGGCGAGAAGACCATCGACTCGGTGACGTCTGCC
+CAGGAGCTCCGAGGATGCACCGTCATCAACGGGAGTCTGATCATCAACATTCGAGGAGGC
+AACAATCTGGCAGCTGAGCTAGAAGCCAACCTCGGCCTCATTGAAGAAATTTCAGGGTAT
+CTAAAAATCCGCCGATCCTACGCTCTGGTGTCACTTTCCTTCTTCCGGAAGTTACGTCTG
+ATTCGAGGAGAGACCTTGGAAATTGGGAACTACTCCTTCTATGCCTTGGACAACCAGAAC
+CTAAGGCAGCTCTGGGACTGGAGCAAACACAACCTCACCACCACTCAGGGGAAACTCTTC
+TTCCACTATAACCCCAAACTCTGCTTGTCAGAAATCCACAAGATGGAAGAAGTTTCAGGA
+ACCAAGGGGCGCCAGGAGAGAAACGACATTGCCCTGAAGACCAATGGGGACAAGGCATCC
+TGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTG
+AGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTAC
+AAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAAC
+AGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAAC
+CACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAG
+ACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTAT
+GTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCA
+TCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCAC
+TACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGC
+CTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT
+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG
+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT
+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC
+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG
+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG
+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC
+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC
+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT
+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG
+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG
+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT
+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC
+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT
+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT
+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT
+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC
+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC
+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG
+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG
+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC
+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC
+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA
+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG
+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA
+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC
+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC
+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA
+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT
+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA
+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG
+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT
+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC
+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC
+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG
+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT
+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT
+ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC
+TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA
+GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT
+TTTTTCGTTCCCCCCACCCGCCCCCAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT
+CTTTTTTTTTTTTTTTTTTTTTTTTTTTTGCTGGTGTCTGAGCTTCAGTATAAAAGACAA
+AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA
+>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.
+CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC
+CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG
+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG
+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC
+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT
+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC
+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT
+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG
+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC
+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA
+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC
+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC
+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG
+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC
+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG
+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA
+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG
+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT
+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG
+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA
+GCCATCCCACCAG
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/README.rst Mon Sep 23 06:14:13 2013 -0400
@@ -0,0 +1,168 @@
+Galaxy wrappers for NCBI BLAST+ suite
+=====================================
+
+These wrappers are copyright 2010-2013 by Peter Cock, The James Hutton Institute
+(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
+See the licence text below.
+
+Currently tested with NCBI BLAST 2.2.26+ (i.e. version 2.2.26 of BLAST+),
+and does not work with the NCBI 'legacy' BLAST suite (e.g. blastall).
+
+Note that these wrappers (and the associated datatypes) were originally
+distributed as part of the main Galaxy repository, but as of August 2012
+moved to the Galaxy Tool Shed as 'ncbi_blast_plus' (and 'blast_datatypes').
+My thanks to Dannon Baker from the Galaxy development team for his assistance
+with this.
+
+These wrappers are available from the Galaxy Tool Shed at:
+http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+
+
+Automated Installation
+======================
+
+Galaxy should be able to automatically install the dependencies, i.e. the
+'blast_datatypes' repository which defines the BLAST XML file format
+('blastxml') and protein and nucleotide BLAST databases ('blastdbp' and
+'blastdbn').
+
+You must tell Galaxy about any system level BLAST databases using configuration
+files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
+databases like NR), and blastdb_d.loc (protein domain databases like CDD or
+SMART) which are located in the tool-data/ folder. Sample files are included
+which explain the tab-based format to use.
+
+You can download the NCBI provided databases as tar-balls from here:
+
+* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR)
+* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD)
+
+
+Manual Installation
+===================
+
+For those not using Galaxy's automated installation from the Tool Shed, put
+the XML and Python files in the tools/ncbi_blast_plus/ folder and add the XML
+files to your tool_conf.xml as normal (and do the same in tool_conf.xml.sample
+in order to run the unit tests). For example, use::
+
+
+
+You will also need to install 'blast_datatypes' from the Tool Shed. This
+defines the BLAST XML file format ('blastxml') and protein and nucleotide
+BLAST databases composite file formats ('blastdbp' and 'blastdbn').
+
+As described above for an automated installation, you must also tell Galaxy
+about any system level BLAST databases using the tool-data/blastdb*.loc files.
+
+You must install the NCBI BLAST+ standalone tools somewhere on the system
+path. Currently the unit tests are written using "BLAST 2.2.26+".
+
+Run the functional tests (adjusting the section identifier to match your
+tool_conf.xml.sample file)::
+
+ ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools
+
+
+History
+=======
+
+======= ======================================================================
+Version Changes
+------- ----------------------------------------------------------------------
+v0.0.11 - Final revision as part of the Galaxy main repository, and the
+ first release via the Tool Shed
+v0.0.12 - Implements genetic code option for translation searches.
+ - Changes to 1000 sequences at a time (to cope with
+ very large sets of queries where BLAST+ can become memory hungry)
+ - Include warning that BLAST+ with subject FASTA gives pairwise
+ e-values
+v0.0.13 - Use the new error handling options in Galaxy (the previously
+ bundled hide_stderr.py script is no longer needed).
+v0.0.14 - Support for makeblastdb and blastdbinfo with local BLAST databases
+ in the history (using work from Edward Kirton), requires v0.0.14
+ of the 'blast_datatypes' repository from the Tool Shed.
+v0.0.15 - Stronger warning in help text against searching against subject
+ FASTA files (better looking e-values than you might be expecting).
+v0.0.16 - Added repository_dependencies.xml for automates installation of the
+ 'blast_datatypes' repository from the Tool Shed.
+v0.0.17 - The BLAST+ search tools now default to extended tabular output
+ (all too often our users where having to re-run searches just to
+ get one of the missing columns like query or subject length)
+v0.0.18 - Defensive quoting of filenames in case of spaces (where possible,
+ BLAST+ handling of some mult-file arguments is problematic).
+v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc
+ for the domain databases they use (e.g. CDD, PFAM or SMART).
+ - Correct case of exception regular expression (for error handling
+ fall-back in case the return code is not set properly).
+ - Clearer naming of output files.
+v0.0.20 - Added unit tests for BLASTN and TBLASTX.
+ - Added percentage identity option to BLASTN.
+ - Fallback on ElementTree if cElementTree missing in XML to tabular.
+ - Link to Tool Shed added to help text and this documentation.
+ - Tweak dependency on blast_datatypes to also work on Test Tool Shed.
+ - Dependency on new package_blast_plus_2_2_26 in Tool Shed.
+ - Adopted standard MIT License.
+ - Development moved to GitHub, https://github.com/peterjc/galaxy_blast
+ - Updated citation information (Cock et al. 2013).
+======= ======================================================================
+
+
+Bug Reports
+===========
+
+You can file an issue here https://github.com/peterjc/galaxy_blast/issues or ask
+us on the Galaxy development list http://lists.bx.psu.edu/listinfo/galaxy-dev
+
+
+Developers
+==========
+
+This script and related tools were originally developed on the 'tools' branch
+of the following Mercurial repository:
+https://bitbucket.org/peterjc/galaxy-central/
+
+As of July 2013, development is continuing on a dedicated GitHub repository:
+https://github.com/peterjc/galaxy_blast
+
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use
+the following command from the GitHub repository root folder::
+
+ $ ./ncbi_blast_plus/make_ncbi_blast_plus.sh
+
+This simplifies ensuring a consistent set of files is bundled each time,
+including all the relevant test files.
+
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/blastxml_to_tabular.py
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Mon Sep 23 06:14:13 2013 -0400
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-"""Convert a BLAST XML file to 12 column tabular output
+"""Convert a BLAST XML file to tabular output.
Takes three command line options, input BLAST XML filename, output tabular
BLAST filename, output format (std for standard 12 columns, or ext for the
@@ -62,8 +62,15 @@
import sys
import re
+if "-v" in sys.argv or "--version" in sys.argv:
+ print "v0.0.12"
+ sys.exit(0)
+
if sys.version_info[:2] >= ( 2, 5 ):
- import xml.etree.cElementTree as ElementTree
+ try:
+ from xml.etree import cElementTree as ElementTree
+ except ImportError:
+ from xml.etree import ElementTree as ElementTree
else:
from galaxy import eggs
import pkg_resources; pkg_resources.require( "elementtree" )
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/blastxml_to_tabular.xml
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,5 +1,6 @@
-
+
Convert BLAST XML output to tabular
+ blastxml_to_tabular.py --version
blastxml_to_tabular.py $blastxml_file $tabular_file $out_format
@@ -128,5 +129,17 @@
low complexity (columns 21 and 22), and thus also calculated figures like
the percentage identity (column 3).
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_blast_plus.txt
--- a/tools/ncbi_blast_plus/ncbi_blast_plus.txt Thu Apr 25 09:38:37 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,144 +0,0 @@
-Galaxy wrappers for NCBI BLAST+ suite
-=====================================
-
-These wrappers are copyright 2010-2013 by Peter Cock, The James Hutton Institute
-(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
-See the licence text below.
-
-Currently tested with NCBI BLAST 2.2.26+ (i.e. version 2.2.26 of BLAST+),
-and does not work with the NCBI 'legacy' BLAST suite (e.g. blastall).
-
-Note that these wrappers (and the associated datatypes) were originally
-distributed as part of the main Galaxy repository, but as of August 2012
-moved to the Galaxy Tool Shed as 'ncbi_blast_plus' (and 'blast_datatypes').
-My thanks to Dannon Baker from the Galaxy development team for his assistance
-with this.
-
-
-Automated Installation
-======================
-
-Galaxy should be able to automatically install the dependencies, i.e. the
-'blast_datatypes' repository which defines the BLAST XML file format
-('blastxml') and protein and nucleotide BLAST databases ('blastdbp' and
-'blastdbn').
-
-You must tell Galaxy about any system level BLAST databases using configuration
-files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
-databases like NR), and blastdb_d.loc (protein domain databases like CDD or
-SMART) which are located in the tool-data/ folder. Sample files are included
-which explain the tab-based format to use.
-
-You can download the NCBI provided databases as tar-balls from here:
-ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR)
-ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD)
-
-
-Manual Installation
-===================
-
-For those not using Galaxy's automated installation from the Tool Shed, put
-the XML and Python files in the tools/ncbi_blast_plus/ folder and add the XML
-files to your tool_conf.xml as normal (and do the same in tool_conf.xml.sample
-in order to run the unit tests). For example, use:
-
-
-
-You will also need to install 'blast_datatypes' from the Tool Shed. This
-defines the BLAST XML file format ('blastxml') and protein and nucleotide
-BLAST databases composite file formats ('blastdbp' and 'blastdbn').
-
-As described above for an automated installation, you must also tell Galaxy
-about any system level BLAST databases using the tool-data/blastdb*.loc files.
-
-You must install the NCBI BLAST+ standalone tools somewhere on the system
-path. Currently the unit tests are written using "BLAST 2.2.26+".
-
-Run the functional tests (adjusting the section identifier to match your
-tool_conf.xml.sample file):
-
-./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools
-
-
-History
-=======
-
-v0.0.11 - Final revision as part of the Galaxy main repository, and the
- first release via the Tool Shed
-v0.0.12 - Implements genetic code option for translation searches.
- - Changes to 1000 sequences at a time (to cope with
- very large sets of queries where BLAST+ can become memory hungry)
- - Include warning that BLAST+ with subject FASTA gives pairwise
- e-values
-v0.0.13 - Use the new error handling options in Galaxy (the previously
- bundled hide_stderr.py script is no longer needed).
-v0.0.14 - Support for makeblastdb and blastdbinfo with local BLAST databases
- in the history (using work from Edward Kirton), requires v0.0.14
- of the 'blast_datatypes' repository from the Tool Shed.
-v0.0.15 - Stronger warning in help text against searching against subject
- FASTA files (better looking e-values than you might be expecting).
-v0.0.16 - Added repository_dependencies.xml for automates installation of the
- 'blast_datatypes' repository from the Tool Shed.
-v0.0.17 - The BLAST+ search tools now default to extended tabular output
- (all too often our users where having to re-run searches just to
- get one of the missing columns like query or subject length)
-v0.0.18 - Defensive quoting of filenames in case of spaces (where possible,
- BLAST+ handling of some mult-file arguments is problematic).
-v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc
- for the domain databases they use (e.g. CDD, PFAM or SMART).
- - Correct case of exception regular expression (for error handling
- fall-back in case the return code is not set properly).
- - Clearer naming of output files.
-
-
-Developers
-==========
-
-This script and related tools are being developed on the 'tools' branch of the
-following Mercurial repository:
-https://bitbucket.org/peterjc/galaxy-central/
-
-For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use
-the following command from the Galaxy root folder:
-
-$ ./tools/ncbi_blast_plus/make_ncbi_blast_plus.sh
-
-This simplifies ensuring a consistent set of files is bundled each time,
-including all the relevant test files.
-
-
-Licence (MIT/BSD style)
-=======================
-
-Permission to use, copy, modify, and distribute this software and its
-documentation with or without modifications and for any purpose and
-without fee is hereby granted, provided that any copyright notices
-appear in all copies and that both those copyright notices and this
-permission notice appear in supporting documentation, and that the
-names of the contributors or copyright holders not be used in
-advertising or publicity pertaining to distribution of the software
-without specific prior permission.
-
-THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
-WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
-OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
-OR PERFORMANCE OF THIS SOFTWARE.
-
-NOTE: This is the licence for the Galaxy Wrapper only. NCBI BLAST+ and
-associated data files are available and licenced separately.
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,4 +1,4 @@
-
+
Show BLAST database information from blastdbcmd
blastdbcmd
@@ -57,9 +57,20 @@
**References**
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
-Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,4 +1,4 @@
-
+
Extract sequence(s) from BLAST database
blastdbcmd
@@ -129,9 +129,20 @@
**References**
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
-Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,4 +1,4 @@
-
+
Search nucleotide database with nucleotide query sequence(s)
@@ -37,6 +37,9 @@
#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
-max_target_seqs $adv_opts.max_hits
#end if
+#if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ):
+-perc_identity $adv_opts.identity_cutoff
+#end if
#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
-word_size $adv_opts.word_size
#end if
@@ -126,6 +129,7 @@
+
@@ -148,6 +152,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
.. class:: warningmark
@@ -233,7 +249,20 @@
**References**
-Zhang et al. A Greedy Algorithm for Aligning DNA Sequences. 2000. JCB: 203-214.
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,4 +1,4 @@
-
+
Search protein database with protein query sequence(s)
@@ -298,9 +298,20 @@
**References**
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
-Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
@@ -286,7 +286,20 @@
**References**
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,5 +1,5 @@
-
- Make BLAST database
+
+ Make BLAST database
makeblastdb
blast+
@@ -14,7 +14,8 @@
## some extra spaces, e.g. -in " file1 file2 file3 " but BLAST seems happy:
-in "
#for $i in $in
-${i.file} #end for
+${i.file}
+#end for
"
#if $title:
-title "$title"
@@ -38,71 +39,73 @@
## #else if $tax.select == 'map':
## -taxid_map $tax.map
## #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
**What it does**
Make BLAST database from one or more FASTA files and/or BLAST databases.
@@ -118,5 +121,23 @@
**Documentation**
http://www.ncbi.nlm.nih.gov/books/NBK1763/
-
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,4 +1,4 @@
-
+
Search protein domain database (PSSMs) with protein query sequence(s)
@@ -230,7 +230,20 @@
**References**
-Marchler-Bauer A, Bryant SH. CD-Search: protein domain annotations on the fly. Nucleic Acids Res. 2004 Jul 1;32(Web Server issue):W327-31.
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,4 +1,4 @@
-
+
Search protein domain database (PSSMs) with translated nucleotide query sequence(s)
@@ -19,13 +19,13 @@
#end if
-evalue $evalue_cutoff
-out "$output1"
-##Set the extended list here so if/when we add things, saved workflows are not affected
+## Set the extended list here so if/when we add things, saved workflows are not affected
#if str($out_format)=="ext":
-outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
#else:
-outfmt $out_format
#end if
-##Seems rpstblastn does not currently support multiple threads :(
+## rpstblastn does not support multiple threads up to release 2.2.27+. Added in BLAST 2.2.28+.
##-num_threads 8
#if $adv_opts.adv_opts_selector=="advanced":
$adv_opts.filter_query
@@ -231,7 +231,20 @@
**References**
-Marchler-Bauer A, Bryant SH. CD-Search: protein domain annotations on the fly. Nucleic Acids Res. 2004 Jul 1;32(Web Server issue):W327-31.
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,4 +1,4 @@
-
+
Search translated nucleotide database with protein query sequence(s)
@@ -332,7 +332,20 @@
**References**
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,4 +1,4 @@
-
+
Search translated nucleotide database with translated nucleotide query sequence(s)
@@ -190,6 +190,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
.. class:: warningmark
@@ -274,7 +286,20 @@
**References**
-Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/repository_dependencies.xml
--- a/tools/ncbi_blast_plus/repository_dependencies.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/repository_dependencies.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,5 +1,4 @@
-
-
+
diff -r 9dabbfd73c8a -r 70e7dcbf6573 tools/ncbi_blast_plus/tool_dependencies.xml
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Thu Apr 25 09:38:37 2013 -0400
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml Mon Sep 23 06:14:13 2013 -0400
@@ -1,21 +1,6 @@
-
-
- ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.26/ncbi-blast-2.2.26+-src.tar.gz
- cd c++ && ./configure --prefix=$INSTALL_DIR && make && make install
-
- $INSTALL_DIR/bin
-
-
-
-
-These links provide information for building the NCBI Blast+ package in most environments.
-
-System requirements
-http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download
-
+
-