diff blast.py @ 4:f9a7783ed7b6 draft

Uploaded v0.0.14 adding BLAST database support. See also the matching update for the NCBI BLAST+ wrappers which use these new definitions. This update included work by Edward Kirton.
author peterjc
date Fri, 09 Nov 2012 06:50:05 -0500
parents 6ef523b390e0
children b3a3ba0c1d47
line wrap: on
line diff
--- a/blast.py	Thu Sep 20 10:13:16 2012 -0400
+++ b/blast.py	Fri Nov 09 06:50:05 2012 -0500
@@ -3,8 +3,9 @@
 """
 
 from galaxy.datatypes.data import get_file_peek
-from galaxy.datatypes.data import Text
+from galaxy.datatypes.data import Text, Data
 from galaxy.datatypes.xml import GenericXml
+from galaxy.datatypes.metadata import MetadataElement
 
 class BlastXml( GenericXml ):
     """NCBI Blast XML Output data"""
@@ -18,6 +19,7 @@
         else:
             dataset.peek = 'file does not exist'
             dataset.blurb = 'file purged from disk'
+
     def sniff( self, filename ):
         """
         Determines whether the file is blastxml
@@ -55,6 +57,9 @@
         if len(split_files) == 1:
             #For one file only, use base class method (move/copy)
             return Text.merge(split_files, output_file)
+        if not split_files:
+            raise ValueError("Given no BLAST XML files, %r, to merge into %s" \
+                             % (split_files, output_file))
         out = open(output_file, "w")
         h = None
         for f in split_files:
@@ -122,3 +127,98 @@
         out.close()
     merge = staticmethod(merge)
 
+
+class _BlastDb(object):
+    """Base class for BLAST database datatype."""
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        """Set the peek and blurb text."""
+        if not dataset.dataset.purged:
+            dataset.peek  = "BLAST database (multiple files)"
+            dataset.blurb = "BLAST database (multiple files)"
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def display_peek( self, dataset ):
+        """Create HTML content, used for displaying peek."""
+        try:
+            return dataset.peek
+        except:
+            return "BLAST database (multiple files)"
+
+    def display_data(self, trans, data, preview=False, filename=None,
+                     to_ext=None, size=None, offset=None, **kwd):
+        """Apparently an old display method, but still gets called.
+
+        This allows us to format the data shown in the central pane via the "eye" icon.
+        """
+        return "This is a BLAST database."
+
+    def get_mime(self):
+        """Returns the mime type of the datatype (pretend it is text for peek)"""
+        return 'text/plain'
+
+    def merge(split_files, output_file):
+        """Merge BLAST databases (not implemented for now)."""
+        raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)")
+
+    def split( cls, input_datasets, subdir_generator_function, split_params):
+        """Split a BLAST database (not implemented for now)."""
+        if split_params is None:
+            return None
+        raise NotImplementedError("Can't split BLAST databases")
+
+
+class BlastNucDb( _BlastDb, Data ):
+    """Class for nucleotide BLAST database files."""
+    file_ext = 'blastdbn'
+    composite_type ='basic'
+    MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
+
+    def __init__(self,**kwd):
+        Data.__init__(self, **kwd)
+        self.add_composite_file('blastdb.nhr')
+        self.add_composite_file('blastdb.nin')
+        self.add_composite_file('blastdb.nsq')
+        self.add_composite_file('blastdb.nhd', optional=True)
+        self.add_composite_file('blastdb.nsi', optional=True)
+        self.add_composite_file('blastdb.nhi', optional=True)
+        self.add_composite_file('blastdb.nog', optional=True)
+        self.add_composite_file('blastdb.nsd', optional=True)
+
+    def display_data(self, trans, data, preview=False, filename=None,
+                     to_ext=None, size=None, offset=None, **kwd):
+        """Apparently an old display method, but still gets called.
+
+        This allows us to format the data shown in the central pane via the "eye" icon.
+        """
+        return "This is a BLAST nucleotide database."
+
+class BlastProtDb( _BlastDb, Data ):
+    """Class for protein BLAST database files."""
+    file_ext = 'blastdbp'
+    composite_type ='basic'
+    MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
+
+    def __init__(self,**kwd):
+        Data.__init__(self, **kwd)
+        self.add_composite_file('blastdb.phr')
+        self.add_composite_file('blastdb.pin')
+        self.add_composite_file('blastdb.psq')
+        self.add_composite_file('blastdb.pnd', optional=True)
+        self.add_composite_file('blastdb.pni', optional=True)
+        self.add_composite_file('blastdb.psd', optional=True)
+        self.add_composite_file('blastdb.psi', optional=True)
+        self.add_composite_file('blastdb.psq', optional=True)
+        self.add_composite_file('blastdb.phd', optional=True)
+        self.add_composite_file('blastdb.phi', optional=True)
+        self.add_composite_file('blastdb.pog', optional=True)
+
+    def display_data(self, trans, data, preview=False, filename=None,
+                     to_ext=None, size=None, offset=None, **kwd):
+        """Apparently an old display method, but still gets called.
+
+        This allows us to format the data shown in the central pane via the "eye" icon.
+        """
+        return "This is a BLAST protein database."