0
|
1 """
|
|
2 NCBI sra class
|
|
3 """
|
|
4 import logging
|
|
5 import binascii
|
|
6 from galaxy.datatypes.data import *
|
|
7 from galaxy.datatypes.sniff import *
|
|
8 from galaxy.datatypes.binary import *
|
|
9 from galaxy.datatypes.metadata import *
|
|
10
|
|
11 log = logging.getLogger(__name__)
|
|
12
|
|
13 class sra( Binary ):
|
|
14 """ Sequence Read Archive (SRA) """
|
|
15 file_ext = 'sra'
|
|
16
|
|
17 def __init__( self, **kwd ):
|
|
18 Binary.__init__( self, **kwd )
|
|
19 def sniff( self, filename ):
|
|
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ
|
|
21 submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
|
|
22 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
|
|
23 """
|
|
24 try:
|
|
25 header = open(filename).read(8)
|
|
26 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
|
|
27 return True
|
|
28 else:
|
|
29 return False
|
|
30 except:
|
|
31 return False
|
|
32 def set_peek(self, dataset, is_multi_byte=False):
|
|
33 if not dataset.dataset.purged:
|
|
34 dataset.peek = 'Binary sra file'
|
|
35 dataset.blurb = data.nice_size(dataset.get_size())
|
|
36 else:
|
|
37 dataset.peek = 'file does not exist'
|
|
38 dataset.blurb = 'file purged from disk'
|
|
39 def display_peek(self, dataset):
|
|
40 try:
|
|
41 return dataset.peek
|
|
42 except:
|
|
43 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
|
|
44
|
|
45 if hasattr(Binary, 'register_sniffable_binary_format'):
|
|
46 Binary.register_sniffable_binary_format('sra', 'sra', sra)
|