# HG changeset patch
# User peterjc
# Date 1361296183 18000
# Node ID 1f546099212f04f539fd44e97f15b57a84756065
# Parent 4ce66a5401d0592bb82bd1a49da74860409b1da1
Uploaded v0.0.17, default to extended 24 column tabular output (rather than standard 12 column output). This should avoid many cases of repeated BLAST jobs being run due to later needing the extra columns.
diff -r 4ce66a5401d0 -r 1f546099212f test-data/blastp_four_human_vs_rhodopsin.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin.tabular Fri Feb 08 05:51:26 2013 -0500
+++ b/test-data/blastp_four_human_vs_rhodopsin.tabular Tue Feb 19 12:49:43 2013 -0500
@@ -3,4 +3,4 @@
sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653
sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631
sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673
-sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 3e-176 599
+sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599
diff -r 4ce66a5401d0 -r 1f546099212f test-data/blastp_four_human_vs_rhodopsin.xml
--- a/test-data/blastp_four_human_vs_rhodopsin.xml Fri Feb 08 05:51:26 2013 -0500
+++ b/test-data/blastp_four_human_vs_rhodopsin.xml Tue Feb 19 12:49:43 2013 -0500
@@ -1,8 +1,8 @@
-
+
-TBLASTN 2.2.25+ +TBLASTN 2.2.26+ Query= sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 @@ -563,7 +563,7 @@ Score = 151 bits (342), Expect(2) = 1e-72, Method: Compositional matrix adjust. - Identities = 69/74 (94%), Positives = 73/74 (99%), Gaps = 0/74 (0%) + Identities = 69/74 (93%), Positives = 73/74 (99%), Gaps = 0/74 (0%) Frame = +3 Query 239 ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSA 298 @@ -584,8 +584,8 @@ Sbjct 2855 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 3031 - Score = 229 bits (523), Expect = 1e-64, Method: Compositional matrix adjust. - Identities = 107/111 (97%), Positives = 109/111 (99%), Gaps = 0/111 (0%) + Score = 229 bits (523), Expect = 9e-67, Method: Compositional matrix adjust. + Identities = 107/111 (96%), Positives = 109/111 (98%), Gaps = 0/111 (0%) Frame = +1 Query 11 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT 70 @@ -598,7 +598,7 @@ Score = 122 bits (276), Expect = 1e-32, Method: Compositional matrix adjust. - Identities = 55/59 (94%), Positives = 56/59 (95%), Gaps = 0/59 (0%) + Identities = 55/59 (93%), Positives = 56/59 (95%), Gaps = 0/59 (0%) Frame = +3 Query 119 LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 177 @@ -606,8 +606,8 @@ Sbjct 1404 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR 1580 - Score = 57.7 bits (125), Expect = 6e-13, Method: Compositional matrix adjust. - Identities = 23/26 (89%), Positives = 24/26 (93%), Gaps = 0/26 (0%) + Score = 57.7 bits (125), Expect = 2e-12, Method: Compositional matrix adjust. + Identities = 23/26 (88%), Positives = 24/26 (92%), Gaps = 0/26 (0%) Frame = +1 Query 312 QFRNCMLTTICCGKNPLGDDEASATV 337 @@ -637,7 +637,7 @@ Score = 658 bits (1517), Expect = 0.0, Method: Compositional matrix adjust. - Identities = 310/326 (96%), Positives = 322/326 (99%), Gaps = 0/326 (0%) + Identities = 310/326 (95%), Positives = 322/326 (99%), Gaps = 0/326 (0%) Frame = +1 Query 11 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT 70 @@ -687,7 +687,7 @@ Score = 711 bits (1640), Expect = 0.0, Method: Compositional matrix adjust. - Identities = 325/348 (94%), Positives = 337/348 (97%), Gaps = 0/348 (0%) + Identities = 325/348 (93%), Positives = 337/348 (97%), Gaps = 0/348 (0%) Frame = +1 Query 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY 60 @@ -737,7 +737,7 @@ Score = 626 bits (1444), Expect = 0.0, Method: Compositional matrix adjust. - Identities = 281/342 (83%), Positives = 311/342 (91%), Gaps = 1/342 (0%) + Identities = 281/342 (82%), Positives = 311/342 (91%), Gaps = 1/342 (0%) Frame = +2 Query 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY 60 diff -r 4ce66a5401d0 -r 1f546099212f test-data/tblastn_four_human_vs_rhodopsin.tabular --- a/test-data/tblastn_four_human_vs_rhodopsin.tabular Fri Feb 08 05:51:26 2013 -0500 +++ b/test-data/tblastn_four_human_vs_rhodopsin.tabular Tue Feb 19 12:49:43 2013 -0500 @@ -2,9 +2,9 @@ sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 1e-64 229 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 6e-13 57.7 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7 sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 diff -r 4ce66a5401d0 -r 1f546099212f test-data/tblastn_four_human_vs_rhodopsin.xml --- a/test-data/tblastn_four_human_vs_rhodopsin.xml Fri Feb 08 05:51:26 2013 -0500 +++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Tue Feb 19 12:49:43 2013 -0500 @@ -1,8 +1,8 @@ - ++ \ No newline at end of file diff -r 4ce66a5401d0 -r 1f546099212f test-data/tblastn_four_human_vs_rhodopsin_ext.tabular --- a/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Fri Feb 08 05:51:26 2013 -0500 +++ b/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Tue Feb 19 12:49:43 2013 -0500 @@ -2,9 +2,9 @@ sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 gi|2734705|gb|U59921.1|BBU59921 1489 290 320 1 93.57 0 3 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 1574 sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 gi|283855845|gb|GQ290303.1| 342 69 73 0 98.65 0 3 ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ 348 4301 sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 gi|283855845|gb|GQ290303.1| 284 54 57 0 96.61 0 2 RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 348 4301 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 1e-64 229 gi|283855845|gb|GQ290303.1| 523 107 109 0 98.20 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 348 4301 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 gi|283855845|gb|GQ290303.1| 523 107 109 0 98.20 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 348 4301 sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 gi|283855845|gb|GQ290303.1| 276 55 56 0 94.92 0 3 LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR 348 4301 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 6e-13 57.7 gi|283855845|gb|GQ290303.1| 125 23 24 0 92.31 0 1 QFRNCMLTTICCGKNPLGDDEASATV QFRNCMLTTLCCGKNPLGDDEASTTA 348 4301 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7 gi|283855845|gb|GQ290303.1| 125 23 24 0 92.31 0 1 QFRNCMLTTICCGKNPLGDDEASATV QFRNCMLTTLCCGKNPLGDDEASTTA 348 4301 sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 gi|283855822|gb|GQ290312.1| 1517 310 322 0 98.77 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT 348 983 sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 gi|18148870|dbj|AB062417.1| 1640 325 337 0 96.84 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 1047 sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 gi|12583664|dbj|AB043817.1| 1444 281 311 1 90.94 0 2 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 1344 diff -r 4ce66a5401d0 -r 1f546099212f test-data/tblastn_four_human_vs_rhodopsin_parse_deflines.tabular --- a/test-data/tblastn_four_human_vs_rhodopsin_parse_deflines.tabular Fri Feb 08 05:51:26 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732 -sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 1e-64 229 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 6e-13 57.7 -sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 -sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 -sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 diff -r 4ce66a5401d0 -r 1f546099212f tools/ncbi_blast_plus/blastxml_to_tabular.xml --- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Fri Feb 08 05:51:26 2013 -0500 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Tue Feb 19 12:49:43 2013 -0500 @@ -1,13 +1,18 @@ - tblastn -TBLASTN 2.2.25+ +TBLASTN 2.2.26+ Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402. Query_1 @@ -375,7 +375,7 @@1 -732.392902459534 +732.393 1689 0 1 @@ -422,7 +422,7 @@1 -646.119739014374 +646.12 1489 0 1 @@ -469,9 +469,9 @@1 -151.343146656381 +151.343 342 -1.39566684546685e-72 +1.39567e-72 239 312 3147 @@ -488,9 +488,9 @@2 -126.323929257285 +126.324 284 -1.39566684546685e-72 +1.39567e-72 177 235 2855 @@ -507,9 +507,9 @@3 -229.420359574251 +229.42 523 -9.84654801241353e-65 +9.34154e-67 11 121 1 @@ -526,9 +526,9 @@4 -122.873002719478 +122.873 276 -1.40732096096596e-32 +1.03783e-32 119 177 1404 @@ -545,9 +545,9 @@5 -57.7367643183824 +57.7368 125 -5.60065526485586e-13 +1.50808e-12 312 337 4222 @@ -592,7 +592,7 @@1 -658.197981896696 +658.198 1517 0 11 @@ -639,7 +639,7 @@1 -711.255977415469 +711.256 1640 0 1 @@ -686,7 +686,7 @@1 -626.708277239213 +626.708 1444 0 1 @@ -719,4 +719,4 @@ -+ diff -r 4ce66a5401d0 -r 1f546099212f tools/ncbi_blast_plus/ncbi_blast_plus.txt --- a/tools/ncbi_blast_plus/ncbi_blast_plus.txt Fri Feb 08 05:51:26 2013 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blast_plus.txt Tue Feb 19 12:49:43 2013 -0500 @@ -34,7 +34,20 @@ For those not using Galaxy's automated installation from the Tool Shed, put the XML and Python files under tools/ncbi_blast_plus and add the XML files -to your tool_conf.xml as normal. +to your tool_conf.xml as normal (and do the same in tool_conf.xml.sample +in order to run the unit tests). For example, use: + + Convert BLAST XML output to tabular blastxml_to_tabular.py $blastxml_file $tabular_file $out_format ++ + + + - - + + @@ -74,7 +79,7 @@ may need both the XML and the tabular output - but running BLAST twice is slow and wasteful. -This tool takes the BLAST XML output and by default converts it into the +This tool takes the BLAST XML output and can convert it into the standard 12 column tabular equivalent: ====== ========= ============================================ @@ -99,7 +104,7 @@ included by selecting the extended tabular output. The extra columns are included *after* the standard 12 columns. This is so that you can write workflow filtering steps that accept either the 12 or 22 column tabular -BLAST output. +BLAST output. This tool now uses this extended 24 column output by default. ====== ============= =========================================== Column NCBI name Description @@ -121,7 +126,7 @@ Beware that the XML file (and thus the conversion) and the tabular output direct from BLAST+ may differ in the presence of XXXX masking on regions low complexity (columns 21 and 22), and thus also calculated figures like -the percentage idenity (column 3). +the percentage identity (column 3). + You will also need to install 'blast_datatypes' from the Tool Shed. This defines the BLAST XML file format ('blastxml') and protein and nucleotide @@ -45,6 +58,14 @@ databases like NR), located in the tool-data folder. Sample fils are included which explain the tab based format to use. +You must install the NCBI BLAST+ standalone tools somewhere on the system +path. Currently the unit tests are written using "BLAST 2.2.26+". + +Run the functional tests (adusting the section identifier to match your +tool_conf.xml.sample file): + +./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools + History ======= @@ -65,6 +86,9 @@ FASTA files (better looking e-values than you might be expecting). v0.0.16 - Added repository_dependencies.xml for automates installation of the 'blast_datatypes' repository from the Tool Shed. +v0.0.17 - The BLAST+ search tools now default to extended tabular output + (all too often our users where having to re-run searches just to + get one of the missing columns like query or subject length) Developers diff -r 4ce66a5401d0 -r 1f546099212f tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Fri Feb 08 05:51:26 2013 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Tue Feb 19 12:49:43 2013 -0500 @@ -1,4 +1,4 @@ -+ + + + + + + + + + Search nucleotide database with nucleotide query sequence(s) @@ -91,8 +91,8 @@ - - + + @@ -200,7 +200,7 @@ included by selecting the extended tabular output. The extra columns are included *after* the standard 12 columns. This is so that you can write workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. +BLAST output. Galaxy now uses this extended 24 column output by default. ====== ============= =========================================== Column NCBI name Description diff -r 4ce66a5401d0 -r 1f546099212f tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Fri Feb 08 05:51:26 2013 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Tue Feb 19 12:49:43 2013 -0500 @@ -1,4 +1,4 @@ - + Search protein database with protein query sequence(s) @@ -86,8 +86,8 @@ - - + + @@ -265,7 +265,7 @@ included by selecting the extended tabular output. The extra columns are included *after* the standard 12 columns. This is so that you can write workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. +BLAST output. Galaxy now uses this extended 24 column output by default. ====== ============= =========================================== Column NCBI name Description diff -r 4ce66a5401d0 -r 1f546099212f tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Fri Feb 08 05:51:26 2013 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Tue Feb 19 12:49:43 2013 -0500 @@ -1,4 +1,4 @@ - + Search protein database with translated nucleotide query sequence(s) @@ -103,8 +103,8 @@ - - + + @@ -253,7 +253,7 @@ included by selecting the extended tabular output. The extra columns are included *after* the standard 12 columns. This is so that you can write workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. +BLAST output. Galaxy now uses this extended 24 column output by default. ====== ============= =========================================== Column NCBI name Description diff -r 4ce66a5401d0 -r 1f546099212f tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Fri Feb 08 05:51:26 2013 -0500 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Tue Feb 19 12:49:43 2013 -0500 @@ -1,4 +1,4 @@ - + Search translated nucleotide database with protein query sequence(s) @@ -82,8 +82,8 @@ - - + + @@ -216,7 +216,7 @@ - + @@ -299,7 +299,7 @@ included by selecting the extended tabular output. The extra columns are included *after* the standard 12 columns. This is so that you can write workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. +BLAST output. Galaxy now uses this extended 24 column output by default. ====== ============= =========================================== Column NCBI name Description diff -r 4ce66a5401d0 -r 1f546099212f tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Fri Feb 08 05:51:26 2013 -0500 +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Tue Feb 19 12:49:43 2013 -0500 @@ -1,4 +1,4 @@ - + Search translated nucleotide database with translated nucleotide query sequence(s) @@ -103,8 +103,8 @@ - - + + @@ -241,7 +241,7 @@ included by selecting the extended tabular output. The extra columns are included *after* the standard 12 columns. This is so that you can write workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. +BLAST output. Galaxy now uses this extended 24 column output by default. ====== ============= =========================================== Column NCBI name Description