Mercurial > repos > peterjc > blast2go
changeset 0:cd52c931b325
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
author | peterjc |
---|---|
date | Tue, 07 Jun 2011 16:28:31 -0400 |
parents | |
children | 0f159cf346c8 |
files | tools/ncbi_blast_plus/blast2go.py tools/ncbi_blast_plus/blast2go.txt tools/ncbi_blast_plus/blast2go.xml |
diffstat | 3 files changed, 273 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/blast2go.py Tue Jun 07 16:28:31 2011 -0400 @@ -0,0 +1,76 @@ +#!/usr/bin/env python +"""Galaxy wrapper for Blast2GO for pipelines, b2g4pipe v2.3.5. + +This script takes exactly three command line arguments: + * Input BLAST XML filename + * Blast2GO properties filename (settings file) + * Output tabular filename + +It then calls the Java command line tool, and moves the output file to +the location Galaxy is expecting. +""" +import sys +import os +import subprocess + +#You may need to edit this to match your local setup, +blast2go_jar = "/opt/b2g4pipe/blast2go.jar" + + +def stop_err(msg, error_level=1): + """Print error message to stdout and quit with given error level.""" + sys.stderr.write("%s\n" % msg) + sys.exit(error_level) + +if len(sys.argv) != 4: + stop_err("Require three arguments: XML filename, properties filename, output tabular filename") + +xml_file, prop_file, tabular_file = sys.argv[1:] + +if not os.path.isfile(xml_file): + stop_err("Input BLAST XML file not found: %s" % xml_file) + +if not os.path.isfile(prop_file): + stop_err("Blast2GO configuration file not found: %s" % prop_file) + +def run(cmd): + #Avoid using shell=True when we call subprocess to ensure if the Python + #script is killed, so too is the child process. + try: + child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except Exception, err: + stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) + #Use .communicate as can get deadlocks with .wait(), + stdout, stderr = child.communicate() + return_code = child.returncode + if return_code: + if stderr and stdout: + stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr)) + else: + stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr)) + #For early diagnostics, + else: + print stdout + print stderr + +if not os.path.isfile(blast2go_jar): + stop_err("Blast2GO JAR file not found: %s" % blast2go_jar) + +#We will have write access whereever the output should be, +#so we'll ask Blast2GO to use that as the stem for its output +#(it will append .annot to the filename) +cmd = ["java", "-jar", blast2go_jar, + "-in", xml_file, + "-prop", prop_file, + "-out", tabular_file, + "-a"] +run(cmd) + +out_file = tabular_file + ".annot" +if not os.path.isfile(out_file): + stop_err("ERROR - No output annotation file from Blast2GO") + +#Move the output file where Galaxy expects it to be: +os.rename(out_file, tabular_file) + +print "Done"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/blast2go.txt Tue Jun 07 16:28:31 2011 -0400 @@ -0,0 +1,122 @@ +Galaxy wrapper for Blast2GO for pipelines, b2g4pipe +=================================================== + +This wrapper is copyright 2011 by Peter Cock, The James Hutton Institute +(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +See the licence text below. + +This is a wrapper for the command line Java tool b2g4pipe v2.3.5, +Blast2GO for pipelines. See: + +S. Götz et al. +High-throughput functional annotation and data mining with the Blast2GO suite. +Nucleic Acids Res. 36(10):3420–3435, 2008. +http://dx.doi.org/10.1093/nar/gkn176 + +A. Conesa and S. Götz. +Blast2GO: A Comprehensive Suite for Functional Analysis in Plant Genomics. +Int. J. Plant Genomics. 619832, 2008. +http://dx.doi.org/10.1155/2008/619832 + +A. Conesa et al. +Blast2GO: A universal tool for annotation, visualization and analysis in functional genomics research. +Bioinformatics 21:3674-3676, 2005. +http://dx.doi.org/10.1093/bioinformatics/bti610 + +http://www.blast2go.org/ + + + +Installation +============ + +You can change the path by editing the definition near the start of the Python +script blast2go.py, but by default it expects the underlying tool to be here: + +/opt/b2g4pip/blast2go.jar + +To install the wrapper copy or move the following files under the Galaxy tools +folder, e.g. in the tools/ncbi_blast_blast folder: + +* blast2go.xml (the Galaxy tool definition) +* blast2go.py (the Python wrapper script) +* blast2go.txt (this README file) + +You will also need to modify the tools_conf.xml file to tell Galaxy to offer the +tool. We suggest putting it next to the NCBI BLAST+ wrappers. Just add the line: + +<tool file="ncbi_blast_plus/blast2go.xml" /> + +As part of setting up b2g4pipe you will need to setup one or more Blast2GO +property files which tell the tool which database to use etc. The example +b2gPipe.properties provided with b2g4pipe v2.3.5 is out of date, with the +latest server IP address and database name given on the Blast2GO website. +These files can be anywhere accessable to the Galaxy Unix user, we put them +under /opt/b2g4pipe with the JAR file etc. + +You must tell Galaxy about these Blast2GO property files so that they can be +offered to the user. Create the file tool-data/blast2go.loc under the Galaxy +folder. This must be plain text, tab separated, with three columns: + +(1) ID for the setup, e.g. Spain_2010_May +(2) Description for the setup, e.g. Database in Spain (May 2010) +(3) Properties filename for the setup, e.g. /opt/b2g4pipe/Spain_2010_May.properties + +Avoid including "Blast2GO" in the description (column 2) as this will be +included in the automatically assigned output dataset name. The blast2go.loc +file allows you to customise the database setup. If for example you have a local +Blast2GO server running (which we recommend for speed), and you want this to be +the default setting, include it as the first line in your blast2go.loc file. + +Consult the Blast2GO documentation for details about the property files and +setting up a local MySQL Blast2GO database. + + +History +======= + +v0.0.1 - Initial public release + + +Developers +========== + +This script and related tools are being developed on the following hg branch: +http://bitbucket.org/peterjc/galaxy-central/src/tools + +For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball I use +the following command from the Galaxy root folder: + +$ tar -czf blast2go.tar.gz tools/ncbi_blast_plus/blast2go.xml tools/ncbi_blast_plus/blast2go.py tools/ncbi_blast_plus/blast2go.txt + +Check this worked: + +$ tar -tzf blast2go.tar.gz +tools/ncbi_blast_plus/blast2go.xml +tools/ncbi_blast_plus/blast2go.py +tools/ncbi_blast_plus/blast2go.txt + + +Licence (MIT/BSD style) +======================= + +Permission to use, copy, modify, and distribute this software and its +documentation with or without modifications and for any purpose and +without fee is hereby granted, provided that any copyright notices +appear in all copies and that both those copyright notices and this +permission notice appear in supporting documentation, and that the +names of the contributors or copyright holders not be used in +advertising or publicity pertaining to distribution of the software +without specific prior permission. + +THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +OR PERFORMANCE OF THIS SOFTWARE. + +NOTE: This is the licence for the Galaxy Wrapper only. Blast2GO and +associated data files are available and licenced separately.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/blast2go.xml Tue Jun 07 16:28:31 2011 -0400 @@ -0,0 +1,75 @@ +<tool id="blast2go" name="Blast2GO" version="0.0.1"> + <description>Maps BLAST results to GO annotation terms</description> + <command interpreter="python"> + blast2go.py $xml ${prop.fields.path} $tab + </command> + <inputs> + <param name="xml" type="data" format="blastxml" label="BLAST XML results" description="You must have run BLAST against a protein database such as the NCBI non-redundant (NR) database. Use BLASTX for nucleotide queries, BLASTP for protein queries." /> + <param name="prop" type="select" label="Blast2GO settings" description="One or more configurations can be setup, such as using the Blast2GO team's server in Spain, or a local database."> + <options from_file="blast2go.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + </inputs> + <outputs> + <data name="tab" format="tabular" label="Blast2GO ${prop.fields.name}" /> + </outputs> + <requirements> + </requirements> + <tests> + </tests> + <help> +.. class:: warningmark + +**Note**. Blast2GO may take a substantial amount of time, especially if +running against the public server in Spain. For large input datasets it +is advisable to allow overnight processing, or consider subdividing. + +----- + +**What it does** + +This runs b2g4Pipe, the command line (no GUI) version of Blast2GO designed +for use in pipelines. + +It takes as input BLAST XML results against a protein database, typically +the NCBI non-redundant (NR) database. The BLAST matches are used to assign +Gene Ontology (GO) annotation terms to each query sequence. + +The output from this tool is a tabular file containing three columns, with +the order taken from query order in the original BLAST XML file: + +====== ==================================== +Column Description +------ ------------------------------------ + 1 ID and description of query sequence + 2 GO term + 3 GO description +====== ==================================== + +Note that if no GO terms are assigned to a sequence (e.g. if it had no +BLAST matches), then it will not be present in the output file. + +**References** + +S. Götz et al. +High-throughput functional annotation and data mining with the Blast2GO suite. +Nucleic Acids Res. 36(10):3420–3435, 2008. +http://dx.doi.org/10.1093/nar/gkn176 + +A. Conesa and S. Götz. +Blast2GO: A Comprehensive Suite for Functional Analysis in Plant Genomics. +Int. J. Plant Genomics. 619832, 2008. +http://dx.doi.org/10.1155/2008/619832 + +A. Conesa et al. +Blast2GO: A universal tool for annotation, visualization and analysis in functional genomics research. +Bioinformatics 21:3674-3676, 2005. +http://dx.doi.org/10.1093/bioinformatics/bti610 + +http://www.blast2go.org/ + + </help> +</tool>