changeset 4:f9a7783ed7b6 draft

Uploaded v0.0.14 adding BLAST database support. See also the matching update for the NCBI BLAST+ wrappers which use these new definitions. This update included work by Edward Kirton.
author peterjc
date Fri, 09 Nov 2012 06:50:05 -0500
parents 6ef523b390e0
children b3a3ba0c1d47
files blast.py blast_datatypes.txt datatypes_conf.xml
diffstat 3 files changed, 112 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/blast.py	Thu Sep 20 10:13:16 2012 -0400
+++ b/blast.py	Fri Nov 09 06:50:05 2012 -0500
@@ -3,8 +3,9 @@
 """
 
 from galaxy.datatypes.data import get_file_peek
-from galaxy.datatypes.data import Text
+from galaxy.datatypes.data import Text, Data
 from galaxy.datatypes.xml import GenericXml
+from galaxy.datatypes.metadata import MetadataElement
 
 class BlastXml( GenericXml ):
     """NCBI Blast XML Output data"""
@@ -18,6 +19,7 @@
         else:
             dataset.peek = 'file does not exist'
             dataset.blurb = 'file purged from disk'
+
     def sniff( self, filename ):
         """
         Determines whether the file is blastxml
@@ -55,6 +57,9 @@
         if len(split_files) == 1:
             #For one file only, use base class method (move/copy)
             return Text.merge(split_files, output_file)
+        if not split_files:
+            raise ValueError("Given no BLAST XML files, %r, to merge into %s" \
+                             % (split_files, output_file))
         out = open(output_file, "w")
         h = None
         for f in split_files:
@@ -122,3 +127,98 @@
         out.close()
     merge = staticmethod(merge)
 
+
+class _BlastDb(object):
+    """Base class for BLAST database datatype."""
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        """Set the peek and blurb text."""
+        if not dataset.dataset.purged:
+            dataset.peek  = "BLAST database (multiple files)"
+            dataset.blurb = "BLAST database (multiple files)"
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def display_peek( self, dataset ):
+        """Create HTML content, used for displaying peek."""
+        try:
+            return dataset.peek
+        except:
+            return "BLAST database (multiple files)"
+
+    def display_data(self, trans, data, preview=False, filename=None,
+                     to_ext=None, size=None, offset=None, **kwd):
+        """Apparently an old display method, but still gets called.
+
+        This allows us to format the data shown in the central pane via the "eye" icon.
+        """
+        return "This is a BLAST database."
+
+    def get_mime(self):
+        """Returns the mime type of the datatype (pretend it is text for peek)"""
+        return 'text/plain'
+
+    def merge(split_files, output_file):
+        """Merge BLAST databases (not implemented for now)."""
+        raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)")
+
+    def split( cls, input_datasets, subdir_generator_function, split_params):
+        """Split a BLAST database (not implemented for now)."""
+        if split_params is None:
+            return None
+        raise NotImplementedError("Can't split BLAST databases")
+
+
+class BlastNucDb( _BlastDb, Data ):
+    """Class for nucleotide BLAST database files."""
+    file_ext = 'blastdbn'
+    composite_type ='basic'
+    MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
+
+    def __init__(self,**kwd):
+        Data.__init__(self, **kwd)
+        self.add_composite_file('blastdb.nhr')
+        self.add_composite_file('blastdb.nin')
+        self.add_composite_file('blastdb.nsq')
+        self.add_composite_file('blastdb.nhd', optional=True)
+        self.add_composite_file('blastdb.nsi', optional=True)
+        self.add_composite_file('blastdb.nhi', optional=True)
+        self.add_composite_file('blastdb.nog', optional=True)
+        self.add_composite_file('blastdb.nsd', optional=True)
+
+    def display_data(self, trans, data, preview=False, filename=None,
+                     to_ext=None, size=None, offset=None, **kwd):
+        """Apparently an old display method, but still gets called.
+
+        This allows us to format the data shown in the central pane via the "eye" icon.
+        """
+        return "This is a BLAST nucleotide database."
+
+class BlastProtDb( _BlastDb, Data ):
+    """Class for protein BLAST database files."""
+    file_ext = 'blastdbp'
+    composite_type ='basic'
+    MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
+
+    def __init__(self,**kwd):
+        Data.__init__(self, **kwd)
+        self.add_composite_file('blastdb.phr')
+        self.add_composite_file('blastdb.pin')
+        self.add_composite_file('blastdb.psq')
+        self.add_composite_file('blastdb.pnd', optional=True)
+        self.add_composite_file('blastdb.pni', optional=True)
+        self.add_composite_file('blastdb.psd', optional=True)
+        self.add_composite_file('blastdb.psi', optional=True)
+        self.add_composite_file('blastdb.psq', optional=True)
+        self.add_composite_file('blastdb.phd', optional=True)
+        self.add_composite_file('blastdb.phi', optional=True)
+        self.add_composite_file('blastdb.pog', optional=True)
+
+    def display_data(self, trans, data, preview=False, filename=None,
+                     to_ext=None, size=None, offset=None, **kwd):
+        """Apparently an old display method, but still gets called.
+
+        This allows us to format the data shown in the central pane via the "eye" icon.
+        """
+        return "This is a BLAST protein database."
--- a/blast_datatypes.txt	Thu Sep 20 10:13:16 2012 -0400
+++ b/blast_datatypes.txt	Fri Nov 09 06:50:05 2012 -0500
@@ -21,6 +21,8 @@
 v0.0.11 - Final revision as part of the Galaxy main repository, and the
           first release via the Tool Shed
 v0.0.13 - Uses blast.py instead of xml.py to define the datatypes
+v0.0.14 - Includes datatypes for protein and nucleotide BLAST databases
+          (based on work by Edward Kirton)
 
 
 Installation
@@ -37,9 +39,15 @@
 datatypes_conf.xml entry to be combined with your local configuration.
 
 However, if you really want to this should work for a manual install. Add
-the following line to the datatypes_conf.xml file in the Galaxy main folder:
+the following lines to the datatypes_conf.xml file in the Galaxy main folder:
 
    <datatype extension="blastxml" type="galaxy.datatypes.blast:BlastXml" mimetype="application/xml" display_in_upload="true"/>
+   <datatype extension="blastdbn" type="galaxy.datatypes.blast:BlastNucDb" mimetype="text/html" display_in_upload="false"/>
+   <datatype extension="blastdbp" type="galaxy.datatypes.blast:BlastProtDb" mimetype="text/html" display_in_upload="false"/>
+
+and later in the sniffer section:
+
+    <sniffer type="galaxy.datatypes.blast:BlastXml"/>
 
 Also create the file lib/galaxy/datatypes/blast.py by moving, copying or linking
 the blast.py file provided in this tar-ball.  Finally add 'import blast' near
--- a/datatypes_conf.xml	Thu Sep 20 10:13:16 2012 -0400
+++ b/datatypes_conf.xml	Fri Nov 09 06:50:05 2012 -0500
@@ -5,6 +5,8 @@
     </datatype_files>
     <registration>
         <datatype extension="blastxml" type="galaxy.datatypes.blast:BlastXml" mimetype="application/xml" display_in_upload="true"/>
+        <datatype extension="blastdbn" type="galaxy.datatypes.blast:BlastNucDb" mimetype="text/html" display_in_upload="false"/>
+        <datatype extension="blastdbp" type="galaxy.datatypes.blast:BlastProtDb" mimetype="text/html" display_in_upload="false"/>
     </registration>
     <sniffers>
         <sniffer type="galaxy.datatypes.blast:BlastXml"/>