3
- 121.324
- 303
- 1.96633e-33
+ 121.709
+ 304
+ 1.62516e-33
2855
3031
177
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human_all.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_rhodopsin_vs_four_human_all.tabular Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,10 @@
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|57163782|ref|NM_001009242.1| gi|57163782|ref|NM_001009242.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA71FL4TS79VA9ML34VI71LI7SASA21LI13TA1GV11 99 99 N/A N/A N/A N/A N/A
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A 0 gi|2734705|gb|U59921.1|BBU59921 gi|2734705|gb|U59921.1|BBU59921 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 10IV1MF2KA20YF1IM1CA7LV7ML5IV17FV1NDHL4CG3VS1ML1ST1MLNH3IVLF1AP3YNVL36SG4VI7IV3SA2VA3LA9ML5VI26LM8RQ2CF30VIFA13FY2STNH3EN7VI7SASA9LM8IL2LI6FL1ED1DE 63 63 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA9HY61FL4TS28 22 8 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 18LV3GS19LI7SASA13 22 5 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 6ML34VI14VARASA 22 4 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 1AG36LV1LF13VA4 22 4 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 9LI13TA1AV 22 2 N/A N/A N/A N/A N/A
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A 0 gi|283855822|gb|GQ290312.1| gi|283855822|gb|GQ290312.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA66ND4FL4TS5ML7AP49LV25ML34VI40VI6LV19VI11SASA21LI13TA1 99 99 N/A N/A N/A N/A N/A
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|18148870|dbj|AB062417.1| gi|18148870|dbj|AB062417.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA9AY22MV38FL4TS79VA9ML10PLHKEP1TV14IT2LM1VI47LV3GS11DN14TSSA1VI17VL2LI13TA13 99 99 N/A N/A N/A N/A N/A
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A 0 gi|12583664|dbj|AB043817.1| gi|12583664|dbj|AB043817.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 10IV1MF22AQ2AM1SA4FL2IVAL12IVEQ23FL4TS1ML3ML12IL14CS8WYMV5VMTS6SN6MAVF2TV6LA3FA20RLAK1GEIV10TVCV2ST2LMAIVI1SF4RQ2CF12EA3RK2RK6VI4SA2VI9WF8TN9SA5SA1LI3MV3CM6HN2IL2LI6FLEGEDEDDEGA 74 74 N/A N/A N/A N/A N/A
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human_converted.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Fri Mar 14 07:40:46 2014 -0400
@@ -1,6 +1,6 @@
gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639
gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220
gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127
gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121
gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Fri Mar 14 07:40:46 2014 -0400
@@ -1,8 +1,8 @@
gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 sp|P08100|OPSD_HUMAN 560 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 303 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human_ext.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Fri Mar 14 07:40:46 2014 -0400
@@ -1,10 +1,10 @@
-gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A
-gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 sp|P08100|OPSD_HUMAN 560 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 303 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A
gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A
gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A
-gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A
-gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A
-gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A
diff -r 6560192c5098 -r 623f727cdff1 test-data/convert2blastmask_four_human_masked.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert2blastmask_four_human_masked.maskinfo-asn1 Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,158 @@
+Blast-db-mask-info ::= {
+ algo-id 0,
+ algo-program seg,
+ algo-options "window=12; locut=2.2; hicut=2.5",
+ masks {
+ masks {
+ int {
+ from 6,
+ to 18,
+ id swissprot {
+ name "ERP44_HUMAN",
+ accession "Q9BS26",
+ release "reviewed"
+ }
+ },
+ packed-int {
+ {
+ from 11,
+ to 46,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 325,
+ to 332,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 421,
+ to 496,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 501,
+ to 516,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 536,
+ to 558,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 636,
+ to 648,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 737,
+ to 762,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 789,
+ to 806,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 970,
+ to 983,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ },
+ {
+ from 999,
+ to 1010,
+ id swissprot {
+ name "BMP2K_HUMAN",
+ accession "Q9NSY1",
+ release "reviewed"
+ }
+ }
+ },
+ packed-int {
+ {
+ from 3,
+ to 26,
+ id swissprot {
+ name "INSR_HUMAN",
+ accession "P06213",
+ release "reviewed"
+ }
+ },
+ {
+ from 372,
+ to 390,
+ id swissprot {
+ name "INSR_HUMAN",
+ accession "P06213",
+ release "reviewed"
+ }
+ },
+ {
+ from 766,
+ to 791,
+ id swissprot {
+ name "INSR_HUMAN",
+ accession "P06213",
+ release "reviewed"
+ }
+ },
+ {
+ from 1312,
+ to 1324,
+ id swissprot {
+ name "INSR_HUMAN",
+ accession "P06213",
+ release "reviewed"
+ }
+ }
+ },
+ int {
+ from 230,
+ to 246,
+ id swissprot {
+ name "OPSD_HUMAN",
+ accession "P08100",
+ release "reviewed"
+ }
+ }
+ },
+ more FALSE
+ }
+}
diff -r 6560192c5098 -r 623f727cdff1 test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary
Binary file test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary has changed
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins.fasta.log
--- a/test-data/four_human_proteins.fasta.log Tue Jan 21 13:37:01 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-
-
-Building a new DB, current time: 11/21/2013 11:16:27
-New DB name: /tmp/tmpnSjpCP/tmpwAbNo4/database/files/000/dataset_2_files/blastdb
-New DB title: Just 4 human proteins
-Sequence type: Protein
-Keep Linkouts: T
-Keep MBits: T
-Maximum file size: 1000000000B
-Adding sequences from FASTA; added 4 sequences in 0.00202417 seconds.
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.log.txt Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,5 @@
+New DB title: Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_masked.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_masked.fasta Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
+qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
+qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
+NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
+pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
+fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
+attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.log.txt Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,5 @@
+New DB title: Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.phd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.phd Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,4 @@
+11117184492
+29249033410
+36665887501
+5392473183
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.phi
Binary file test-data/four_human_proteins_taxid.fasta.phi has changed
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.phr
Binary file test-data/four_human_proteins_taxid.fasta.phr has changed
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.pin
Binary file test-data/four_human_proteins_taxid.fasta.pin has changed
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.pog
Binary file test-data/four_human_proteins_taxid.fasta.pog has changed
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.psd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.psd Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,4 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
+gnl|bl_ord_id|33
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.psi
Binary file test-data/four_human_proteins_taxid.fasta.psi has changed
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.psq
Binary file test-data/four_human_proteins_taxid.fasta.psq has changed
diff -r 6560192c5098 -r 623f727cdff1 test-data/segmasker_four_human.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.fasta Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
+qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
+qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
+NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
+pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
+fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
+attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
diff -r 6560192c5098 -r 623f727cdff1 test-data/segmasker_four_human.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.maskinfo-asn1 Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,114 @@
+Blast-db-mask-info ::= {
+ algo-id 1,
+ algo-program seg,
+ algo-options "window=12; locut=2.2; hicut=2.5",
+ masks {
+ masks {
+ int {
+ from 6,
+ to 18,
+ id local id 1
+ },
+ packed-int {
+ {
+ from 11,
+ to 46,
+ id local id 2
+ },
+ {
+ from 325,
+ to 332,
+ id local id 2
+ },
+ {
+ from 421,
+ to 443,
+ id local id 2
+ },
+ {
+ from 437,
+ to 450,
+ id local id 2
+ },
+ {
+ from 447,
+ to 496,
+ id local id 2
+ },
+ {
+ from 501,
+ to 516,
+ id local id 2
+ },
+ {
+ from 536,
+ to 554,
+ id local id 2
+ },
+ {
+ from 545,
+ to 558,
+ id local id 2
+ },
+ {
+ from 636,
+ to 648,
+ id local id 2
+ },
+ {
+ from 737,
+ to 762,
+ id local id 2
+ },
+ {
+ from 789,
+ to 806,
+ id local id 2
+ },
+ {
+ from 970,
+ to 983,
+ id local id 2
+ },
+ {
+ from 999,
+ to 1010,
+ id local id 2
+ }
+ },
+ packed-int {
+ {
+ from 3,
+ to 26,
+ id local id 3
+ },
+ {
+ from 372,
+ to 390,
+ id local id 3
+ },
+ {
+ from 766,
+ to 782,
+ id local id 3
+ },
+ {
+ from 780,
+ to 791,
+ id local id 3
+ },
+ {
+ from 1312,
+ to 1324,
+ id local id 3
+ }
+ },
+ int {
+ from 230,
+ to 246,
+ id local id 4
+ }
+ },
+ more FALSE
+ }
+}
diff -r 6560192c5098 -r 623f727cdff1 test-data/segmasker_four_human.maskinfo-asn1-binary
Binary file test-data/segmasker_four_human.maskinfo-asn1-binary has changed
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastn_four_human_vs_rhodopsin.html
--- a/test-data/tblastn_four_human_vs_rhodopsin.html Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastn_four_human_vs_rhodopsin.html Fri Mar 14 07:40:46 2014 -0400
@@ -3,7 +3,7 @@
-TBLASTN 2.2.28+
+TBLASTN 2.2.29+
Query= sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44
@@ -461,8 +461,8 @@
Length=1047
-
- Score = 732 bits (1689), Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score = 732 bits (1689), Expect = 0.0, Method: Compositional matrix adjust.
Identities = 336/348 (97%), Positives = 343/348 (99%), Gaps = 0/348 (0%)
Frame = +1
@@ -511,8 +511,8 @@
Length=1574
-
- Score = 646 bits (1489), Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score = 646 bits (1489), Expect = 0.0, Method: Compositional matrix adjust.
Identities = 290/342 (85%), Positives = 320/342 (94%), Gaps = 1/342 (0%)
Frame = +3
@@ -561,8 +561,8 @@
Length=4301
-
- Score = 151 bits (342), Expect(2) = 1e-72, Method: Compositional matrix adjust.
+
+ Score = 151 bits (342), Expect(2) = 1e-72, Method: Compositional matrix adjust.
Identities = 69/74 (93%), Positives = 73/74 (99%), Gaps = 0/74 (0%)
Frame = +3
@@ -575,7 +575,7 @@
Sbjct 3327 SIYNPVIYIMMNKQ 3368
- Score = 126 bits (284), Expect(2) = 1e-72, Method: Compositional matrix adjust.
+ Score = 126 bits (284), Expect(2) = 1e-72, Method: Compositional matrix adjust.
Identities = 54/59 (92%), Positives = 57/59 (97%), Gaps = 0/59 (0%)
Frame = +2
@@ -584,7 +584,7 @@
Sbjct 2855 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 3031
- Score = 229 bits (523), Expect = 9e-67, Method: Compositional matrix adjust.
+ Score = 229 bits (523), Expect = 9e-67, Method: Compositional matrix adjust.
Identities = 107/111 (96%), Positives = 109/111 (98%), Gaps = 0/111 (0%)
Frame = +1
@@ -597,7 +597,7 @@
Sbjct 181 PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 333
- Score = 122 bits (276), Expect = 1e-32, Method: Compositional matrix adjust.
+ Score = 122 bits (276), Expect = 1e-32, Method: Compositional matrix adjust.
Identities = 55/59 (93%), Positives = 56/59 (95%), Gaps = 0/59 (0%)
Frame = +3
@@ -635,8 +635,8 @@
Length=983
-
- Score = 658 bits (1517), Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score = 658 bits (1517), Expect = 0.0, Method: Compositional matrix adjust.
Identities = 310/326 (95%), Positives = 322/326 (99%), Gaps = 0/326 (0%)
Frame = +1
@@ -685,8 +685,8 @@
Length=1047
-
- Score = 711 bits (1640), Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score = 711 bits (1640), Expect = 0.0, Method: Compositional matrix adjust.
Identities = 325/348 (93%), Positives = 337/348 (97%), Gaps = 0/348 (0%)
Frame = +1
@@ -735,8 +735,8 @@
Length=1344
-
- Score = 626 bits (1444), Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score = 626 bits (1444), Expect = 0.0, Method: Compositional matrix adjust.
Identities = 281/342 (82%), Positives = 311/342 (91%), Gaps = 1/342 (0%)
Frame = +2
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastn_four_human_vs_rhodopsin.tabular
--- a/test-data/tblastn_four_human_vs_rhodopsin.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastn_four_human_vs_rhodopsin.tabular Fri Mar 14 07:40:46 2014 -0400
@@ -1,10 +1,10 @@
-sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732
-sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122
+sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732
+sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122
sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7
-sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658
-sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711
-sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626
+sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658
+sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711
+sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastn_four_human_vs_rhodopsin.xml
--- a/test-data/tblastn_four_human_vs_rhodopsin.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Fri Mar 14 07:40:46 2014 -0400
@@ -2,7 +2,7 @@
tblastn
- TBLASTN 2.2.28+
+ TBLASTN 2.2.29+
Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.
Query_1
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastn_four_human_vs_rhodopsin_ext.tabular
--- a/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Fri Mar 14 07:40:46 2014 -0400
@@ -1,10 +1,10 @@
-sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732 gi|57163782|ref|NM_001009242.1| 1689 336 343 0 98.56 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 1047 N/A
-sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 gi|2734705|gb|U59921.1|BBU59921 1489 290 320 1 93.57 0 3 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 1574 N/A
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 gi|283855845|gb|GQ290303.1| 342 69 73 0 98.65 0 3 ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ 348 4301 N/A
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 gi|283855845|gb|GQ290303.1| 284 54 57 0 96.61 0 2 RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 348 4301 N/A
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 gi|283855845|gb|GQ290303.1| 523 107 109 0 98.20 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 348 4301 N/A
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 gi|283855845|gb|GQ290303.1| 276 55 56 0 94.92 0 3 LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR 348 4301 N/A
+sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732 gi|57163782|ref|NM_001009242.1| 1689 336 343 0 98.56 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 1047 N/A
+sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 gi|2734705|gb|U59921.1|BBU59921 1489 290 320 1 93.57 0 3 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 1574 N/A
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 gi|283855845|gb|GQ290303.1| 342 69 73 0 98.65 0 3 ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ 348 4301 N/A
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 gi|283855845|gb|GQ290303.1| 284 54 57 0 96.61 0 2 RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 348 4301 N/A
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 gi|283855845|gb|GQ290303.1| 523 107 109 0 98.20 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 348 4301 N/A
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 gi|283855845|gb|GQ290303.1| 276 55 56 0 94.92 0 3 LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR 348 4301 N/A
sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7 gi|283855845|gb|GQ290303.1| 125 23 24 0 92.31 0 1 QFRNCMLTTICCGKNPLGDDEASATV QFRNCMLTTLCCGKNPLGDDEASTTA 348 4301 N/A
-sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 gi|283855822|gb|GQ290312.1| 1517 310 322 0 98.77 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT 348 983 N/A
-sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 gi|18148870|dbj|AB062417.1| 1640 325 337 0 96.84 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 1047 N/A
-sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 gi|12583664|dbj|AB043817.1| 1444 281 311 1 90.94 0 2 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 1344 N/A
+sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 gi|283855822|gb|GQ290312.1| 1517 310 322 0 98.77 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT 348 983 N/A
+sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 gi|18148870|dbj|AB062417.1| 1640 325 337 0 96.84 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 1047 N/A
+sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 gi|12583664|dbj|AB043817.1| 1444 281 311 1 90.94 0 2 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 1344 N/A
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastx_rhodopsin_vs_three_human.tabular
--- a/test-data/tblastx_rhodopsin_vs_three_human.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastx_rhodopsin_vs_three_human.tabular Fri Mar 14 07:40:46 2014 -0400
@@ -1,57 +1,57 @@
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 97.39 230 6 0 1 690 88 777 0.0 559
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.12 102 6 0 742 1047 829 1134 0.0 236
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 91.22 148 13 0 1046 603 1133 690 0.0 308
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.32 88 5 0 566 303 653 390 0.0 207
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 96.34 82 3 0 248 3 335 90 0.0 182
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 83.33 204 34 0 18 629 105 716 4e-158 404
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 75.28 89 22 0 780 1046 867 1133 4e-158 161
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.28 203 38 0 609 1 696 88 5e-153 360
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 80.60 67 13 0 916 716 1003 803 5e-153 135
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 97.39 230 6 0 1 690 88 777 0.0 559
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.12 102 6 0 742 1047 829 1134 0.0 236
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 91.22 148 13 0 1046 603 1133 690 0.0 308
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.32 88 5 0 566 303 653 390 0.0 207
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 96.34 82 3 0 248 3 335 90 0.0 182
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 83.33 204 34 0 18 629 105 716 4e-158 404
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 75.28 89 22 0 780 1046 867 1133 4e-158 161
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.28 203 38 0 609 1 696 88 5e-153 360
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 80.60 67 13 0 916 716 1003 803 5e-153 135
gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 70.27 37 11 0 1047 937 1134 1024 5e-153 64.2
gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 100.00 7 0 0 646 626 733 713 5e-153 24.0
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.23 65 7 0 460 266 547 353 4e-105 167
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.58 48 5 0 184 41 271 128 4e-105 104
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.23 65 7 0 460 266 547 353 4e-105 167
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.58 48 5 0 184 41 271 128 4e-105 104
gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 77.78 45 10 0 882 748 969 835 4e-105 93.9
gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 67.86 28 9 0 1045 962 1132 1049 4e-105 51.9
gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 59.09 22 9 0 586 521 673 608 4e-105 33.1
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.40 86 16 0 296 553 383 640 2e-87 185
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.40 86 16 0 296 553 383 640 2e-87 185
gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 84.38 32 5 0 11 106 98 193 2e-87 74.8
gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 71.43 35 10 0 941 1045 1028 1132 2e-87 61.6
gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.44 18 1 0 794 847 881 934 2e-87 50.1
-gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 83.61 238 39 0 18 731 64 777 0.0 507
-gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 82.35 85 15 0 783 1037 829 1083 0.0 188
-gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 70.96 303 88 0 925 17 971 63 2e-130 435
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 83.61 238 39 0 18 731 64 777 0.0 507
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 82.35 85 15 0 783 1037 829 1083 0.0 188
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 70.96 303 88 0 925 17 971 63 2e-130 435
gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 72.22 18 5 0 1027 974 1073 1020 2e-130 35.0
-gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 55.32 188 84 0 605 42 651 88 7e-89 245
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 55.32 188 84 0 605 42 651 88 7e-89 245
gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 61.11 72 28 0 1037 822 1083 868 7e-89 91.3
-gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 49.02 204 104 0 29 640 75 686 4e-78 197
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 49.02 204 104 0 29 640 75 686 4e-78 197
gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 66.04 53 18 0 860 1018 906 1064 4e-78 85.8
gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 44.44 27 15 0 689 769 735 815 4e-78 32.2
-gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 47.47 198 104 0 633 40 679 86 4e-65 177
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 47.47 198 104 0 633 40 679 86 4e-65 177
gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 68.09 47 15 0 1017 877 1063 923 4e-65 80.3
-gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 57.89 114 48 0 265 606 311 652 3e-46 137
+gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 57.89 114 48 0 265 606 311 652 3e-46 137
gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 46.30 54 29 0 19 180 65 226 3e-46 52.4
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.40 111 4 0 1 333 118 450 0.0 264
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.31 65 5 0 3174 3368 829 1023 0.0 151
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.43 56 2 0 2855 3022 616 783 0.0 141
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.22 59 4 0 1404 1580 442 618 0.0 138
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.40 111 4 0 1 333 118 450 0.0 264
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.31 65 5 0 3174 3368 829 1023 0.0 151
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.43 56 2 0 2855 3022 616 783 0.0 141
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.22 59 4 0 1404 1580 442 618 0.0 138
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.00 25 2 0 4222 4296 1021 1095 0.0 64.3
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 88.89 9 1 0 3128 3154 783 809 0.0 22.6
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 78.38 111 24 0 333 1 450 118 7e-171 212
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 88.75 80 9 0 3367 3128 1022 783 7e-171 161
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.33 60 4 0 1582 1403 620 441 7e-171 136
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.07 56 5 0 3021 2854 782 615 7e-171 119
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 78.38 111 24 0 333 1 450 118 7e-171 212
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 88.75 80 9 0 3367 3128 1022 783 7e-171 161
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.33 60 4 0 1582 1403 620 441 7e-171 136
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.07 56 5 0 3021 2854 782 615 7e-171 119
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 84.62 26 4 0 4301 4224 1100 1023 7e-171 52.8
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 95.83 72 3 0 218 3 335 120 8e-142 152
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 77.78 63 14 0 3368 3180 1023 835 8e-142 125
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 85.11 47 7 0 1544 1404 582 442 8e-142 108
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.36 56 11 0 3022 2855 783 616 8e-142 101
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 95.83 72 3 0 218 3 335 120 8e-142 152
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 77.78 63 14 0 3368 3180 1023 835 8e-142 125
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 85.11 47 7 0 1544 1404 582 442 8e-142 108
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.36 56 11 0 3022 2855 783 616 8e-142 101
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 75.86 29 7 0 325 239 442 356 8e-142 58.3
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 86.36 22 3 0 4287 4222 1086 1021 8e-142 48.7
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 90.91 11 1 0 3159 3127 814 782 8e-142 31.3
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 81.03 58 11 0 2854 3027 615 788 2e-122 128
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 83.33 60 10 0 1403 1582 441 620 2e-122 125
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.60 67 13 0 3 203 120 320 2e-122 119
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 81.03 58 11 0 2854 3027 615 788 2e-122 128
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 83.33 60 10 0 1403 1582 441 620 2e-122 125
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.60 67 13 0 3 203 120 320 2e-122 119
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.30 23 2 0 4220 4288 1019 1087 2e-122 53.8
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 78.26 23 5 0 266 334 383 451 2e-122 48.3
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.00 20 4 0 3308 3367 963 1022 2e-122 46.0
@@ -68,16 +68,16 @@
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 77.27 22 5 0 267 332 384 449 6e-43 45.1
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 86.36 22 3 0 4224 4289 1023 1088 6e-43 44.1
gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 83.33 12 2 0 2856 2891 617 652 6e-43 25.4
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 95.91 220 9 0 1 660 118 777 0.0 526
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 92.13 89 7 0 712 978 829 1095 0.0 212
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.01 178 16 0 536 3 653 120 1e-178 353
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 88.32 137 16 0 983 573 1100 690 1e-178 277
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 77.43 319 72 0 3 959 120 1076 4e-174 593
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 79.07 129 27 0 558 172 675 289 2e-133 248
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 76.83 82 19 0 963 718 1080 835 2e-133 159
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 95.91 220 9 0 1 660 118 777 0.0 526
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 92.13 89 7 0 712 978 829 1095 0.0 212
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.01 178 16 0 536 3 653 120 1e-178 353
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 88.32 137 16 0 983 573 1100 690 1e-178 277
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 77.43 319 72 0 3 959 120 1076 4e-174 593
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 79.07 129 27 0 558 172 675 289 2e-133 248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 76.83 82 19 0 963 718 1080 835 2e-133 159
gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 84.09 44 7 0 133 2 250 119 2e-133 97.3
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 73.08 78 21 0 433 200 550 317 6e-102 145
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 70.15 67 20 0 799 599 916 716 6e-102 106
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 73.08 78 21 0 433 200 550 317 6e-102 145
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 70.15 67 20 0 799 599 916 716 6e-102 106
gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 80.49 41 8 0 123 1 240 118 6e-102 84.5
gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 77.78 27 6 0 553 473 670 590 6e-102 51.9
gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 71.43 14 4 0 889 848 1006 965 6e-102 32.7
@@ -87,16 +87,16 @@
gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 81.40 43 8 0 404 532 521 649 4e-48 47.3
gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 88.89 18 2 0 764 817 881 934 4e-48 44.6
gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 87.50 8 1 0 935 958 1052 1075 4e-48 21.7
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 93.91 230 14 0 1 690 88 777 0.0 538
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 91.18 102 9 0 742 1047 829 1134 0.0 233
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 88.83 188 21 0 566 3 653 90 0.0 394
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 84.06 138 22 0 1046 633 1133 720 0.0 260
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.11 228 75 0 684 1 771 88 7e-132 333
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.27 110 36 0 1045 716 1132 803 7e-132 141
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 70.20 151 45 0 3 455 90 542 1e-128 236
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 64.04 89 32 0 780 1046 867 1133 1e-128 136
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.22 74 25 0 510 731 597 818 1e-128 111
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.04 106 36 0 242 559 329 646 2e-58 161
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 93.91 230 14 0 1 690 88 777 0.0 538
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 91.18 102 9 0 742 1047 829 1134 0.0 233
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 88.83 188 21 0 566 3 653 90 0.0 394
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 84.06 138 22 0 1046 633 1133 720 0.0 260
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.11 228 75 0 684 1 771 88 7e-132 333
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.27 110 36 0 1045 716 1132 803 7e-132 141
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 70.20 151 45 0 3 455 90 542 1e-128 236
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 64.04 89 32 0 780 1046 867 1133 1e-128 136
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.22 74 25 0 510 731 597 818 1e-128 111
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.04 106 36 0 242 559 329 646 2e-58 161
gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 85.71 21 3 0 92 154 179 241 2e-58 53.8
gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 73.68 19 5 0 791 847 878 934 2e-58 39.1
gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 61.29 62 24 0 424 239 511 326 4e-55 81.3
@@ -104,11 +104,11 @@
gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 65.71 35 12 0 882 778 969 865 4e-55 56.3
gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 58.14 43 18 0 649 521 736 608 4e-55 50.6
gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.67 12 4 0 972 937 1059 1024 4e-55 23.9
-gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 82.13 235 42 0 11 715 76 780 0.0 498
-gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 78.31 83 18 0 770 1018 835 1083 0.0 177
-gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 72.29 332 92 0 1017 22 1082 87 1e-150 516
-gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 48.30 147 76 0 712 272 777 337 2e-98 169
-gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 54.17 72 33 0 1030 815 1095 880 2e-98 103
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 82.13 235 42 0 11 715 76 780 0.0 498
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 78.31 83 18 0 770 1018 835 1083 0.0 177
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 72.29 332 92 0 1017 22 1082 87 1e-150 516
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 48.30 147 76 0 712 272 777 337 2e-98 169
+gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 54.17 72 33 0 1030 815 1095 880 2e-98 103
gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 47.83 69 36 0 220 14 285 79 2e-98 83.5
gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 72.00 25 7 0 782 708 847 773 2e-98 45.1
gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 56.00 75 33 0 532 756 597 821 5e-65 87.7
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/README.rst Fri Mar 14 07:40:46 2014 -0400
@@ -1,9 +1,8 @@
Galaxy wrappers for NCBI BLAST+ suite
=====================================
-These wrappers are copyright 2010-2013 by Peter Cock, The James Hutton Institute
-(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
-See the licence text below.
+These wrappers are copyright 2010-2013 by Peter Cock (The James Hutton Institute,
+UK) and additional contributors. All rights reserved. See the licence text below.
Currently tested with NCBI BLAST 2.2.28+ (i.e. version 2.2.28 of BLAST+),
and does not work with the NCBI 'legacy' BLAST suite (e.g. ``blastall``).
@@ -26,17 +25,7 @@
(``blastxml``) and protein and nucleotide BLAST databases (``blastdbp`` and
``blastdbn``).
-You must tell Galaxy about any system level BLAST databases using configuration
-files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
-databases like NR), and blastdb_d.loc (protein domain databases like CDD or
-SMART) which are located in the tool-data/ folder. Sample files are included
-which explain the tab-based format to use.
-
-You can download the NCBI provided databases as tar-balls from here:
-
-* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR)
-* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD)
-
+See the configuration notes below.
Manual Installation
===================
@@ -79,6 +68,39 @@
./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools
+Configuration
+=============
+
+You must tell Galaxy about any system level BLAST databases using configuration
+files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
+databases like NR), and blastdb_d.loc (protein domain databases like CDD or
+SMART) which are located in the tool-data/ folder. Sample files are included
+which explain the tab-based format to use.
+
+You can download the NCBI provided databases as tar-balls from here:
+
+* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR)
+* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD)
+
+If using the optional taxonomy columns, you will also need to download the
+NCBI taxonomy files (``taxdb.btd`` and ``taxdb.bti`` from ``taxdb.tar.gz`` on
+the BLAST database FTP site). Currently explicit version tracking of the
+taxonomy is not supported, and in order to use this you must set the
+``$BLASTDB`` environment variable to include the path where you unzipped the
+taxonomy files. If this is not done, the taxonomy columns like species name
+will appear as ``N/A`` in the tabular output.
+
+The BLAST+ binaries support multi-threaded operation, which is handled via the
+$GALAXY_SLOTS environment variable. This should be set automatically by Galaxy
+via your job runner settings, which allows you to (for example) allocate four
+cores to each BLAST job.
+
+In addition, the BLAST+ wrappers also support high level parallelism by task
+splitting if ``use_tasked_jobs = True`` is enabled in your ``universe_wsgi.ini``
+configuration file. Essentially, the FASTA input query files are broken up into
+batches of 1000 sequences, a separate BLAST child job is run for each chunk,
+and then the BLAST output files are merged (in order). This is transparent
+for the end user.
History
=======
@@ -106,7 +128,7 @@
(all too often our users where having to re-run searches just to
get one of the missing columns like query or subject length)
v0.0.18 - Defensive quoting of filenames in case of spaces (where possible,
- BLAST+ handling of some mult-file arguments is problematic).
+ BLAST+ handling of some multi-file arguments is problematic).
v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc
for the domain databases they use (e.g. CDD, PFAM or SMART).
- Correct case of exception regular expression (for error handling
@@ -122,20 +144,30 @@
- Development moved to GitHub, https://github.com/peterjc/galaxy_blast
- Updated citation information (Cock et al. 2013).
v0.0.21 - Use macros to simplify the XML wrappers.
- - Added wrapper for dustmasker
- - Enabled masking for makeblastdb
- - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes
+ - Added wrapper for dustmasker.
+ - Enabled masking for makeblastdb.
+ - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes.
defined in updated blast_datatypes on Galaxy ToolShed.
- - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26
- - Now depends on package_blast_plus_2_2_27 in ToolShed
-v0.0.22 - More use macros to simplify the wrappers
- - Set number of threads via $GALAXY_SLOTS environment variable
- - More descriptive default output names
- - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18)
+ - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26.
+ - Now depends on package_blast_plus_2_2_27 in ToolShed.
+v0.0.22 - More use macros to simplify the wrappers.
+ - Set number of threads via $GALAXY_SLOTS environment variable.
+ - More descriptive default output names.
+ - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18).
- Pre-check for duplicate identifiers in makeblastdb wrapper.
- - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27
- - Now depends on package_blast_plus_2_2_28 in ToolShed
+ - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27.
+ - Now depends on package_blast_plus_2_2_28 in ToolShed.
- Extended tabular output includes 'salltitles' as column 25.
+v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed.
+ - Tabular output now includes option to pick specific columns,
+ including previously unavailable taxonomy columns.
+ - BLAST XML to tabular tool supports multiple input files.
+ - More detailed descriptions for BLASTN and BLASTP task option.
+ - Wrappers for segmasker, dustmasker and convert2blastmask.
+ - Supports using maskinfo with makeblastdb wrapper.
+ - Supports setting a taxonomy ID in makeblastdb wrapper.
+ - Subtle changes like new conditional settings will require some old
+ workflows be updated to cope.
======= ======================================================================
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/blastxml_to_tabular.py
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Fri Mar 14 07:40:46 2014 -0400
@@ -62,9 +62,11 @@
"""
import sys
import re
+import os
+from optparse import OptionParser
if "-v" in sys.argv or "--version" in sys.argv:
- print "v0.0.22"
+ print "v0.1.00"
sys.exit(0)
if sys.version_info[:2] >= ( 2, 5 ):
@@ -81,34 +83,55 @@
sys.stderr.write("%s\n" % msg)
sys.exit(1)
-#Parse Command Line
-try:
- in_file, out_file, out_fmt = sys.argv[1:]
-except:
- stop_err("Expect 3 arguments: input BLAST XML file, output tabular file, out format (std or ext)")
+if len(sys.argv) == 4 and sys.argv[3] in ["std", "x22", "ext"]:
+ #False positive if user really has a BLAST XML file called 'std' or 'ext'...
+ stop_err("ERROR: The script API has changed, sorry.")
+
+usage = """usage: %prog [options] blastxml[,...]
+
+Convert one (or more) BLAST XML files into a single tabular file.
+The columns option can be 'std' (standard 12 columns), 'ext'
+(extended 25 columns), or a list of BLAST+ column names like
+'qseqid,sseqid,pident' (space or comma separated).
+"""
+parser = OptionParser(usage=usage)
+parser.add_option('-o', '--output', dest='output', default=None, help='output filename (defaults to stdout)', metavar="FILE")
+parser.add_option("-c", "--columns", dest="columns", default='std', help="[std|ext|col1,col2,...] standard 12 columns, extended 25 columns, or list of column names")
+(options, args) = parser.parse_args()
+
+colnames = 'qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles'.split(',')
+
+if len(args) < 1:
+ stop_err("ERROR: No BLASTXML input files given; run with --help to see options.")
+
+out_fmt = options.columns
if out_fmt == "std":
extended = False
+ cols = None
elif out_fmt == "x22":
stop_err("Format argument x22 has been replaced with ext (extended 25 columns)")
elif out_fmt == "ext":
extended = True
+ cols = None
else:
- stop_err("Format argument should be std (12 column) or ext (extended 25 columns), not: %r" % out_fmt)
-
+ cols = out_fmt.replace(" ", ",").split(",") #Allow space or comma separated
+ #Remove any blank entries due to trailing comma,
+ #or annoying "None" dummy value from Galaxy if no columns
+ cols = [c for c in cols if c and c != "None"]
+ extra = set(cols).difference(colnames)
+ if extra:
+ stop_err("These are not recognised column names: %s" % ",".join(sorted(extra)))
+ del extra
+ assert set(colnames).issuperset(cols), cols
+ if not cols:
+ stop_err("No columns selected!")
+ extended = max(colnames.index(c) for c in cols) >= 12 #Do we need any higher columns?
+del out_fmt
-# get an iterable
-try:
- context = ElementTree.iterparse(in_file, events=("start", "end"))
-except:
- stop_err("Invalid data format.")
-# turn it into an iterator
-context = iter(context)
-# get the root element
-try:
- event, root = context.next()
-except:
- stop_err( "Invalid data format." )
+for in_file in args:
+ if not os.path.isfile(in_file):
+ stop_err("Input BLAST XML file not found: %s" % in_file)
re_default_query_id = re.compile("^Query_\d+$")
@@ -122,156 +145,187 @@
assert not re_default_subject_id.match("TheSubject_1")
-outfile = open(out_file, 'w')
-blast_program = None
-for event, elem in context:
- if event == "end" and elem.tag == "BlastOutput_program":
- blast_program = elem.text
- # for every tag
- if event == "end" and elem.tag == "Iteration":
- #Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA
- # sp|Q9BS26|ERP44_HUMAN
- # Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
- # 406
- #
- #
- #Or, from BLAST 2.2.24+ run online
- # Query_1
- # Sample
- # 516
- # ...
- qseqid = elem.findtext("Iteration_query-ID")
- if re_default_query_id.match(qseqid):
- #Place holder ID, take the first word of the query definition
- qseqid = elem.findtext("Iteration_query-def").split(None,1)[0]
- qlen = int(elem.findtext("Iteration_query-len"))
-
- # for every within
- for hit in elem.findall("Iteration_hits/Hit"):
- #Expecting either this,
- # gi|3024260|sp|P56514.1|OPSD_BUFBU
- # RecName: Full=Rhodopsin
- # P56514
- #or,
- # Subject_1
- # gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]
- # Subject_1
+def convert(blastxml_filename, output_handle):
+ blast_program = None
+ # get an iterable
+ try:
+ context = ElementTree.iterparse(in_file, events=("start", "end"))
+ except:
+ stop_err("Invalid data format.")
+ # turn it into an iterator
+ context = iter(context)
+ # get the root element
+ try:
+ event, root = context.next()
+ except:
+ stop_err( "Invalid data format." )
+ for event, elem in context:
+ if event == "end" and elem.tag == "BlastOutput_program":
+ blast_program = elem.text
+ # for every tag
+ if event == "end" and elem.tag == "Iteration":
+ #Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA
+ # sp|Q9BS26|ERP44_HUMAN
+ # Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+ # 406
+ #
#
- #apparently depending on the parse_deflines switch
- #
- #Or, with BLAST 2.2.28+ can get this,
- # gnl|BL_ORD_ID|2
- # chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence
- # 2
- sseqid = hit.findtext("Hit_id").split(None,1)[0]
- hit_def = sseqid + " " + hit.findtext("Hit_def")
- if re_default_subject_id.match(sseqid) \
- and sseqid == hit.findtext("Hit_accession"):
- #Place holder ID, take the first word of the subject definition
- hit_def = hit.findtext("Hit_def")
- sseqid = hit_def.split(None,1)[0]
- if sseqid.startswith("gnl|BL_ORD_ID|") \
- and sseqid == "gnl|BL_ORD_ID|" + hit.findtext("Hit_accession"):
- #Alternative place holder ID, again take the first word of hit_def
- hit_def = hit.findtext("Hit_def")
- sseqid = hit_def.split(None,1)[0]
- # for every within
- for hsp in hit.findall("Hit_hsps/Hsp"):
- nident = hsp.findtext("Hsp_identity")
- length = hsp.findtext("Hsp_align-len")
- pident = "%0.2f" % (100*float(nident)/float(length))
+ #Or, from BLAST 2.2.24+ run online
+ # Query_1
+ # Sample
+ # 516
+ # ...
+ qseqid = elem.findtext("Iteration_query-ID")
+ if re_default_query_id.match(qseqid):
+ #Place holder ID, take the first word of the query definition
+ qseqid = elem.findtext("Iteration_query-def").split(None,1)[0]
+ qlen = int(elem.findtext("Iteration_query-len"))
- q_seq = hsp.findtext("Hsp_qseq")
- h_seq = hsp.findtext("Hsp_hseq")
- m_seq = hsp.findtext("Hsp_midline")
- assert len(q_seq) == len(h_seq) == len(m_seq) == int(length)
- gapopen = str(len(q_seq.replace('-', ' ').split())-1 + \
- len(h_seq.replace('-', ' ').split())-1)
+ # for every within
+ for hit in elem.findall("Iteration_hits/Hit"):
+ #Expecting either this,
+ # gi|3024260|sp|P56514.1|OPSD_BUFBU
+ # RecName: Full=Rhodopsin
+ # P56514
+ #or,
+ # Subject_1
+ # gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]
+ # Subject_1
+ #
+ #apparently depending on the parse_deflines switch
+ #
+ #Or, with a local database not using -parse_seqids can get this,
+ # gnl|BL_ORD_ID|2
+ # chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence
+ # 2
+ sseqid = hit.findtext("Hit_id").split(None,1)[0]
+ hit_def = sseqid + " " + hit.findtext("Hit_def")
+ if re_default_subject_id.match(sseqid) \
+ and sseqid == hit.findtext("Hit_accession"):
+ #Place holder ID, take the first word of the subject definition
+ hit_def = hit.findtext("Hit_def")
+ sseqid = hit_def.split(None,1)[0]
+ if sseqid.startswith("gnl|BL_ORD_ID|") \
+ and sseqid == "gnl|BL_ORD_ID|" + hit.findtext("Hit_accession"):
+ #Alternative place holder ID, again take the first word of hit_def
+ hit_def = hit.findtext("Hit_def")
+ sseqid = hit_def.split(None,1)[0]
+ # for every within
+ for hsp in hit.findall("Hit_hsps/Hsp"):
+ nident = hsp.findtext("Hsp_identity")
+ length = hsp.findtext("Hsp_align-len")
+ pident = "%0.2f" % (100*float(nident)/float(length))
+
+ q_seq = hsp.findtext("Hsp_qseq")
+ h_seq = hsp.findtext("Hsp_hseq")
+ m_seq = hsp.findtext("Hsp_midline")
+ assert len(q_seq) == len(h_seq) == len(m_seq) == int(length)
+ gapopen = str(len(q_seq.replace('-', ' ').split())-1 + \
+ len(h_seq.replace('-', ' ').split())-1)
+
+ mismatch = m_seq.count(' ') + m_seq.count('+') \
+ - q_seq.count('-') - h_seq.count('-')
+ #TODO - Remove this alternative mismatch calculation and test
+ #once satisifed there are no problems
+ expected_mismatch = len(q_seq) \
+ - sum(1 for q,h in zip(q_seq, h_seq) \
+ if q == h or q == "-" or h == "-")
+ xx = sum(1 for q,h in zip(q_seq, h_seq) if q=="X" and h=="X")
+ if not (expected_mismatch - q_seq.count("X") <= int(mismatch) <= expected_mismatch + xx):
+ stop_err("%s vs %s mismatches, expected %i <= %i <= %i" \
+ % (qseqid, sseqid, expected_mismatch - q_seq.count("X"),
+ int(mismatch), expected_mismatch))
- mismatch = m_seq.count(' ') + m_seq.count('+') \
- - q_seq.count('-') - h_seq.count('-')
- #TODO - Remove this alternative mismatch calculation and test
- #once satisifed there are no problems
- expected_mismatch = len(q_seq) \
- - sum(1 for q,h in zip(q_seq, h_seq) \
- if q == h or q == "-" or h == "-")
- xx = sum(1 for q,h in zip(q_seq, h_seq) if q=="X" and h=="X")
- if not (expected_mismatch - q_seq.count("X") <= int(mismatch) <= expected_mismatch + xx):
- stop_err("%s vs %s mismatches, expected %i <= %i <= %i" \
- % (qseqid, sseqid, expected_mismatch - q_seq.count("X"),
- int(mismatch), expected_mismatch))
+ #TODO - Remove this alternative identity calculation and test
+ #once satisifed there are no problems
+ expected_identity = sum(1 for q,h in zip(q_seq, h_seq) if q == h)
+ if not (expected_identity - xx <= int(nident) <= expected_identity + q_seq.count("X")):
+ stop_err("%s vs %s identities, expected %i <= %i <= %i" \
+ % (qseqid, sseqid, expected_identity, int(nident),
+ expected_identity + q_seq.count("X")))
+
- #TODO - Remove this alternative identity calculation and test
- #once satisifed there are no problems
- expected_identity = sum(1 for q,h in zip(q_seq, h_seq) if q == h)
- if not (expected_identity - xx <= int(nident) <= expected_identity + q_seq.count("X")):
- stop_err("%s vs %s identities, expected %i <= %i <= %i" \
- % (qseqid, sseqid, expected_identity, int(nident),
- expected_identity + q_seq.count("X")))
+ evalue = hsp.findtext("Hsp_evalue")
+ if evalue == "0":
+ evalue = "0.0"
+ else:
+ evalue = "%0.0e" % float(evalue)
+ bitscore = float(hsp.findtext("Hsp_bit-score"))
+ if bitscore < 100:
+ #Seems to show one decimal place for lower scores
+ bitscore = "%0.1f" % bitscore
+ else:
+ #Note BLAST does not round to nearest int, it truncates
+ bitscore = "%i" % bitscore
- evalue = hsp.findtext("Hsp_evalue")
- if evalue == "0":
- evalue = "0.0"
- else:
- evalue = "%0.0e" % float(evalue)
-
- bitscore = float(hsp.findtext("Hsp_bit-score"))
- if bitscore < 100:
- #Seems to show one decimal place for lower scores
- bitscore = "%0.1f" % bitscore
- else:
- #Note BLAST does not round to nearest int, it truncates
- bitscore = "%i" % bitscore
+ values = [qseqid,
+ sseqid,
+ pident,
+ length, #hsp.findtext("Hsp_align-len")
+ str(mismatch),
+ gapopen,
+ hsp.findtext("Hsp_query-from"), #qstart,
+ hsp.findtext("Hsp_query-to"), #qend,
+ hsp.findtext("Hsp_hit-from"), #sstart,
+ hsp.findtext("Hsp_hit-to"), #send,
+ evalue, #hsp.findtext("Hsp_evalue") in scientific notation
+ bitscore, #hsp.findtext("Hsp_bit-score") rounded
+ ]
- values = [qseqid,
- sseqid,
- pident,
- length, #hsp.findtext("Hsp_align-len")
- str(mismatch),
- gapopen,
- hsp.findtext("Hsp_query-from"), #qstart,
- hsp.findtext("Hsp_query-to"), #qend,
- hsp.findtext("Hsp_hit-from"), #sstart,
- hsp.findtext("Hsp_hit-to"), #send,
- evalue, #hsp.findtext("Hsp_evalue") in scientific notation
- bitscore, #hsp.findtext("Hsp_bit-score") rounded
- ]
+ if extended:
+ try:
+ sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >"))
+ salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >"))
+ except IndexError as e:
+ stop_err("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e))
+ #print hit_def, "-->", sallseqid
+ positive = hsp.findtext("Hsp_positive")
+ ppos = "%0.2f" % (100*float(positive)/float(length))
+ qframe = hsp.findtext("Hsp_query-frame")
+ sframe = hsp.findtext("Hsp_hit-frame")
+ if blast_program == "blastp":
+ #Probably a bug in BLASTP that they use 0 or 1 depending on format
+ if qframe == "0": qframe = "1"
+ if sframe == "0": sframe = "1"
+ slen = int(hit.findtext("Hit_len"))
+ values.extend([sallseqid,
+ hsp.findtext("Hsp_score"), #score,
+ nident,
+ positive,
+ hsp.findtext("Hsp_gaps"), #gaps,
+ ppos,
+ qframe,
+ sframe,
+ #NOTE - for blastp, XML shows original seq, tabular uses XXX masking
+ q_seq,
+ h_seq,
+ str(qlen),
+ str(slen),
+ salltitles,
+ ])
+ if cols:
+ #Only a subset of the columns are needed
+ values = [values[colnames.index(c)] for c in cols]
+ #print "\t".join(values)
+ outfile.write("\t".join(values) + "\n")
+ # prevents ElementTree from growing large datastructure
+ root.clear()
+ elem.clear()
- if extended:
- try:
- sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >"))
- salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >"))
- except IndexError as e:
- stop_err("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e))
- #print hit_def, "-->", sallseqid
- positive = hsp.findtext("Hsp_positive")
- ppos = "%0.2f" % (100*float(positive)/float(length))
- qframe = hsp.findtext("Hsp_query-frame")
- sframe = hsp.findtext("Hsp_hit-frame")
- if blast_program == "blastp":
- #Probably a bug in BLASTP that they use 0 or 1 depending on format
- if qframe == "0": qframe = "1"
- if sframe == "0": sframe = "1"
- slen = int(hit.findtext("Hit_len"))
- values.extend([sallseqid,
- hsp.findtext("Hsp_score"), #score,
- nident,
- positive,
- hsp.findtext("Hsp_gaps"), #gaps,
- ppos,
- qframe,
- sframe,
- #NOTE - for blastp, XML shows original seq, tabular uses XXX masking
- q_seq,
- h_seq,
- str(qlen),
- str(slen),
- salltitles,
- ])
- #print "\t".join(values)
- outfile.write("\t".join(values) + "\n")
- # prevents ElementTree from growing large datastructure
- root.clear()
- elem.clear()
-outfile.close()
+
+if options.output:
+ outfile = open(options.output, "w")
+else:
+ outfile = sys.stdout
+
+for in_file in args:
+ blast_program = None
+ convert(in_file, outfile)
+
+if options.output:
+ outfile.close()
+else:
+ #Using stdout
+ pass
+
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/blastxml_to_tabular.xml
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,8 +1,15 @@
-
+
Convert BLAST XML output to tabular
blastxml_to_tabular.py --version
- blastxml_to_tabular.py $blastxml_file $tabular_file $out_format
+blastxml_to_tabular.py -o "$tabular_file"
+#if $output.out_format == "cols":
+#set cols = (str($output.std_cols)+","+str($output.ext_cols)).replace("None", " ").replace(",,", ",").replace(",", " ")
+-c "$cols"
+#else
+-c "$output.out_format"
+#end if
+#for i in $blastxml_file#${i} #end for#
@@ -10,14 +17,50 @@
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
@@ -80,6 +123,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -120,7 +176,7 @@
====== ============= ===========================================
Column NCBI name Description
------ ------------- -------------------------------------------
- 13 sallseqid All subject Seq-id(s), separated by ';'
+ 13 sallseqid All subject Seq-id(s), separated by a ';'
14 score Raw score
15 nident Number of identical matches
16 positive Number of positive-scoring matches
@@ -132,7 +188,7 @@
22 sseq Aligned part of subject sequence
23 qlen Query sequence length
24 slen Subject sequence length
- 25 salltitles All subject title(s), separated by '<>'
+ 25 salltitles All subject title(s), separated by a '<>'
====== ============= ===========================================
Beware that the XML file (and thus the conversion) and the tabular output
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Show BLAST database information from blastdbcmd
blastdbcmd
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Extract sequence(s) from BLAST database
blastdbcmd
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Search nucleotide database with nucleotide query sequence(s)
@@ -36,14 +36,16 @@
-
-
-
-
+
+
+
+
+
@@ -63,7 +65,7 @@
-
+
@@ -74,10 +76,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Search protein database with protein query sequence(s)
@@ -34,8 +34,8 @@
-
-
+
+
@@ -55,7 +55,7 @@
-
+
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Search protein database with translated nucleotide query sequence(s)
@@ -83,6 +83,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,87 @@
+
+ Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb
+
+ convert2blastmask
+ ncbi_macros.xml
+
+
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+convert2blastmask
+-in $infile
+-masking_algorithm "$masking_algorithm"
+-masking_options "$masking_options"
+$parse_seqids
+-out "$outfile"
+-outfmt $outformat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+
+
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
masks low complexity regions
@@ -27,27 +27,24 @@
-
-
+
-
-
-
+
+
-
+
+
+
@@ -83,13 +80,14 @@
**What it does**
-This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm.
+This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST_ algorithm.
If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_.
.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+.. _DUST: http://www.ncbi.nlm.nih.gov/pubmed/16796549
**References**
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_macros.xml
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,30 +1,101 @@
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -240,7 +311,7 @@
@BINARY@
- blast+
+ blast+
@BINARY@ -version
@@ -268,10 +339,15 @@
-out "$output1"
##Set the extended list here so when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
+#if str($output.out_format)=="ext":
-outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles"
+#elif str($output.out_format)=="cols"
+##Pick your own columns. Galaxy gives us it comma separated, BLAST+ wants space separated:
+##TODO - Can we catch the user picking no columns and raise an error here?
+#set cols = (str($output.std_cols)+","+str($output.ext_cols)+","+str($output.ids_cols)+","+str($output.misc_cols)+","+str($output.tax_cols)).replace("None", "").replace(",,", ",").replace(",", " ").strip()
+ -outfmt "6 $cols"
#else:
- -outfmt $out_format
+ -outfmt $output.out_format
#end if
$adv_opts.filter_query
@@ -330,7 +406,7 @@
====== ========= ============================================
The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
+but this takes longer to calculate. Many commonly used extra columns are
included by selecting the extended tabular output. The extra columns are
included *after* the standard 12 columns. This is so that you can write
workflow filtering steps that accept either the 12 or 25 column tabular
@@ -339,7 +415,7 @@
====== ============= ===========================================
Column NCBI name Description
------ ------------- -------------------------------------------
- 13 sallseqid All subject Seq-id(s), separated by ';'
+ 13 sallseqid All subject Seq-id(s), separated by a ';'
14 score Raw score
15 nident Number of identical matches
16 positive Number of positive-scoring matches
@@ -351,10 +427,14 @@
22 sseq Aligned part of subject sequence
23 qlen Query sequence length
24 slen Subject sequence length
- 25 salltitles All subject title(s), separated by '<>'
+ 25 salltitles All subject title(s), separated by a '<>'
====== ============= ===========================================
-The third option is BLAST XML output, which is designed to be parsed by
+The third option is to customise the tabular output by selecting which
+columns you want, from the standard set of 12, the default set of 25,
+or any of the additional columns BLAST+ offers (including species name).
+
+The fourth option is BLAST XML output, which is designed to be parsed by
another program, and is understood by some Galaxy tools.
You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Make BLAST database
makeblastdb
@@ -8,50 +8,44 @@
check_no_duplicates.py
##First check for duplicates (since BLAST+ 2.2.28 fails to do so)
##and abort (via the ampersand ampersand trick) if any are found.
-#for $i in $in
-"${i.file}"
-#end for
+#for i in $input_file#"${i}" #end for#
&&
makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}"
$parse_seqids
$hash_index
## Single call to -in with multiple filenames space separated with outer quotes
## (presumably any filenames with spaces would be a problem). Note this gives
-## some extra spaces, e.g. -in " file1 file2 file3 " but BLAST seems happy:
--in "
-#for $i in $in
-${i.file}
-#end for
-"
+## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy:
+-in "#for i in $input_file#${i} #end for#"
#if $title:
-title "$title"
#else:
##Would default to being based on the cryptic Galaxy filenames, which is unhelpful
-title "BLAST Database"
#end if
--dbtype $dbtype
-#set $mask_string = ''
-#set $sep = '-mask_data '
-#for $i in $mask_data
-#set $mask_string += $sep + str($i.file)
-#set $sep = ','
+-dbtype $dbtype
+## --------------------------------------------------------------------
+## Masking
+## --------------------------------------------------------------------
+## HACK: If no mask files, evaluates as a list with just None in it:
+## See Trello issue https://trello.com/c/lp5YmA1O
+#if ' '.join( map(str, $mask_data_file) ) != 'None':
+#for i in $mask_data_file:
+-mask_data "${i}"
#end for
-$mask_string
-## #set $gi_mask_string = ''
-## #set $sep = '-gi_mask -gi_mask_name '
-## #for $i in $gi_mask
-## #set $gi_mask_string += $sep + str($i.file)
-## #set $sep = ','
-## #end for
-## $gi_mask_string
-## #if $tax.select == 'id':
-## -taxid $tax.id
-## #else if $tax.select == 'map':
-## -taxid_map $tax.map
-## #end if
+#end if
+## --------------------------------------------------------------------
+## Taxonomy
+## --------------------------------------------------------------------
+#if $tax.taxselect == 'id':
+-taxid $tax.taxid
+## TODO - Can we use a tabular file for the taxonomy mapping?
+## #else if $tax.taxselect == 'map':
+## -taxid_map $tax.taxmap
+#end if
## --------------------------------------------------------------------
## Capture the stdout log information to the primary file (plain text):
->> "$outfile"
+> "$outfile"
@@ -59,47 +53,38 @@
-
-
-
-
-
+
+
-
-
-
-
-
+
+
+
-
-
+
+
- -->
@@ -112,14 +97,54 @@
-
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Search protein domain database (PSSMs) with protein query sequence(s)
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Search protein domain database (PSSMs) with translated nucleotide query sequence(s)
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -0,0 +1,101 @@
+
+ low-complexity regions in protein sequences
+
+ segmasker
+ ncbi_macros.xml
+
+
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+segmasker
+#if $db_opts.db_opts_selector == "db":
+ -in "${db_opts.database.fields.path}" -infmt blastdb
+#elif $db_opts.db_opts_selector == "histdb":
+ -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb
+#else:
+ -in "$subject" -infmt fasta
+#end if
+-out "$outfile"
+-window $window
+-locut $locut
+-hicut $hicut
+-outfmt $outformat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm.
+
+If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+
+
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Search translated nucleotide database with protein query sequence(s)
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,4 +1,4 @@
-
+
Search translated nucleotide database with translated nucleotide query sequence(s)
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/tool_dependencies.xml
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml Fri Mar 14 07:40:46 2014 -0400
@@ -1,6 +1,6 @@
-
-
+
+