Mercurial > repos > matt-shirley > ncbi_sra_toolkit

diff sra.py @ 5:76c7d617cd56
merge branches
author: Matt Shirley <mdshw5@gmail.com>
date: Mon, 07 Oct 2013 10:13:06 -0400
parents: 956e892b299d
children: e4c21444a3ba
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sra.py	Mon Oct 07 10:13:06 2013 -0400
@@ -0,0 +1,46 @@
+"""
+NCBI sra class
+"""
+import logging
+import binascii
+from galaxy.datatypes.data import *
+from galaxy.datatypes.sniff import *
+from galaxy.datatypes.binary import *
+from galaxy.datatypes.metadata import *
+
+log = logging.getLogger(__name__)
+
+class sra( Binary ):
+    """ Sequence Read Archive (SRA) """
+    file_ext = 'sra'
+
+    def __init__( self, **kwd ):
+        Binary.__init__( self, **kwd )
+    def sniff( self, filename ):
+        """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ 
+        submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
+        For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure 
+        """
+        try:
+            header = open(filename).read(8)
+            if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
+                return True
+            else:
+                return False
+        except:
+            return False
+    def set_peek(self, dataset, is_multi_byte=False):
+        if not dataset.dataset.purged:
+            dataset.peek  = 'Binary sra file'
+            dataset.blurb = data.nice_size(dataset.get_size())
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek(self, dataset):
+        try:
+            return dataset.peek
+        except:
+            return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
+
+if hasattr(Binary, 'register_sniffable_binary_format'):
+    Binary.register_sniffable_binary_format('sra', 'sra', sra)
\ No newline at end of file
author	Matt Shirley <mdshw5@gmail.com>
date	Mon, 07 Oct 2013 10:13:06 -0400
parents	956e892b299d
children	e4c21444a3ba