comparison sra.py @ 6:e4c21444a3ba

Add sniffer, clean up imports, register sniffable datatype.
author Matt Shirley <mdshw5@gmail.com>
date Thu, 13 Mar 2014 15:30:54 -0400
parents 956e892b299d
children 558a88cd49e4
comparison
equal deleted inserted replaced
5:76c7d617cd56 6:e4c21444a3ba
1 """ 1 """
2 NCBI sra class 2 NCBI sra class
3 """ 3 """
4 import logging 4 import logging
5 import binascii 5 import binascii
6 from galaxy.datatypes.data import * 6 from galaxy.datatypes.data import nice_size
7 from galaxy.datatypes.sniff import * 7 from galaxy.datatypes.binary import Binary
8 from galaxy.datatypes.binary import *
9 from galaxy.datatypes.metadata import *
10 8
11 log = logging.getLogger(__name__) 9 log = logging.getLogger(__name__)
12 10
13 class sra( Binary ): 11 class Sra(Binary):
14 """ Sequence Read Archive (SRA) """ 12 """ Sequence Read Archive (SRA) """
15 file_ext = 'sra' 13 file_ext = 'sra'
16 14
17 def __init__( self, **kwd ): 15 def __init__( self, **kwd ):
18 Binary.__init__( self, **kwd ) 16 Binary.__init__( self, **kwd )
17
19 def sniff( self, filename ): 18 def sniff( self, filename ):
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ 19 """ The first 8 bytes of any NCBI sra file is 'NCBI.sra', and the file is binary.
21 submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'. 20 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
22 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
23 """ 21 """
24 try: 22 try:
25 header = open(filename).read(8) 23 header = open(filename).read(8)
26 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): 24 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
27 return True 25 return True
28 else: 26 else:
29 return False 27 return False
30 except: 28 except:
31 return False 29 return False
30
32 def set_peek(self, dataset, is_multi_byte=False): 31 def set_peek(self, dataset, is_multi_byte=False):
33 if not dataset.dataset.purged: 32 if not dataset.dataset.purged:
34 dataset.peek = 'Binary sra file' 33 dataset.peek = 'Binary sra file'
35 dataset.blurb = data.nice_size(dataset.get_size()) 34 dataset.blurb = nice_size(dataset.get_size())
36 else: 35 else:
37 dataset.peek = 'file does not exist' 36 dataset.peek = 'file does not exist'
38 dataset.blurb = 'file purged from disk' 37 dataset.blurb = 'file purged from disk'
38
39 def display_peek(self, dataset): 39 def display_peek(self, dataset):
40 try: 40 try:
41 return dataset.peek 41 return dataset.peek
42 except: 42 except:
43 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) 43 return 'Binary sra file (%s)' % (nice_size(dataset.get_size()))
44 44
45 if hasattr(Binary, 'register_sniffable_binary_format'): 45 Binary.register_sniffable_binary_format('sra', 'sra', 'Sra')
46 Binary.register_sniffable_binary_format('sra', 'sra', sra)