annotate sra_tools-04cc8176e86f/sra.py @ 0:c386fe82db82

Initial commit, moving from test tool shed.
author matt-shirley
date Wed, 25 Sep 2013 21:00:05 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
1 """
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
2 NCBI sra class
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
3 """
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
4 import logging
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
5 import binascii
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
6 from galaxy.datatypes.data import *
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
7 from galaxy.datatypes.sniff import *
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
8 from galaxy.datatypes.binary import *
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
9 from galaxy.datatypes.metadata import *
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
10
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
11 log = logging.getLogger(__name__)
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
12
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
13 class sra( Binary ):
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
14 """ Sequence Read Archive (SRA) """
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
15 file_ext = 'sra'
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
16
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
17 def __init__( self, **kwd ):
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
18 Binary.__init__( self, **kwd )
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
19 def sniff( self, filename ):
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
21 submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
22 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
23 """
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
24 try:
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
25 header = open(filename).read(8)
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
26 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
27 return True
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
28 else:
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
29 return False
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
30 except:
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
31 return False
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
32 def set_peek(self, dataset, is_multi_byte=False):
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
33 if not dataset.dataset.purged:
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
34 dataset.peek = 'Binary sra file'
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
35 dataset.blurb = data.nice_size(dataset.get_size())
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
36 else:
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
37 dataset.peek = 'file does not exist'
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
38 dataset.blurb = 'file purged from disk'
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
39 def display_peek(self, dataset):
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
40 try:
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
41 return dataset.peek
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
42 except:
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
43 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
44
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
45 if hasattr(Binary, 'register_sniffable_binary_format'):
c386fe82db82 Initial commit, moving from test tool shed.
matt-shirley
parents:
diff changeset
46 Binary.register_sniffable_binary_format('sra', 'sra', sra)