ncbi_sra_toolkit: sra.py comparison

merge branches

comparison

equal deleted inserted replaced

-:548b3e8d7a0a
+:76c7d617cd56
+"""
+NCBI sra class
+"""
+import logging
+import binascii
+from galaxy.datatypes.data import *
+from galaxy.datatypes.sniff import *
+from galaxy.datatypes.binary import *
+from galaxy.datatypes.metadata import *
+log = logging.getLogger(__name__)
+class sra( Binary ):
+""" Sequence Read Archive (SRA) """
+file_ext = 'sra'
+def __init__( self, **kwd ):
+Binary.__init__( self, **kwd )
+def sniff( self, filename ):
+""" The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ
+submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
+For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
+"""
+try:
+header = open(filename).read(8)
+if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
+return True
+else:
+return False
+except:
+return False
+def set_peek(self, dataset, is_multi_byte=False):
+if not dataset.dataset.purged:
+dataset.peek  = 'Binary sra file'
+dataset.blurb = data.nice_size(dataset.get_size())
+else:
+dataset.peek = 'file does not exist'
+dataset.blurb = 'file purged from disk'
+def display_peek(self, dataset):
+try:
+return dataset.peek
+except:
+return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
+if hasattr(Binary, 'register_sniffable_binary_format'):
+Binary.register_sniffable_binary_format('sra', 'sra', sra)

Mercurial > repos > matt-shirley > ncbi_sra_toolkit