Mercurial > repos > matt-shirley > ncbi_sra_toolkit
changeset 3:956e892b299d
Add missing SRA datatype
author | matt-shirley |
---|---|
date | Mon, 07 Oct 2013 10:07:25 -0400 |
parents | 293927a46697 |
children | 76c7d617cd56 |
files | sra.py |
diffstat | 1 files changed, 46 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra.py Mon Oct 07 10:07:25 2013 -0400 @@ -0,0 +1,46 @@ +""" +NCBI sra class +""" +import logging +import binascii +from galaxy.datatypes.data import * +from galaxy.datatypes.sniff import * +from galaxy.datatypes.binary import * +from galaxy.datatypes.metadata import * + +log = logging.getLogger(__name__) + +class sra( Binary ): + """ Sequence Read Archive (SRA) """ + file_ext = 'sra' + + def __init__( self, **kwd ): + Binary.__init__( self, **kwd ) + def sniff( self, filename ): + """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ + submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'. + For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure + """ + try: + header = open(filename).read(8) + if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): + return True + else: + return False + except: + return False + def set_peek(self, dataset, is_multi_byte=False): + if not dataset.dataset.purged: + dataset.peek = 'Binary sra file' + dataset.blurb = data.nice_size(dataset.get_size()) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek(self, dataset): + try: + return dataset.peek + except: + return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) + +if hasattr(Binary, 'register_sniffable_binary_format'): + Binary.register_sniffable_binary_format('sra', 'sra', sra) \ No newline at end of file