Mercurial > repos > matt-shirley > ncbi_sra_toolkit
comparison sra.py @ 6:e4c21444a3ba
Add sniffer, clean up imports, register sniffable datatype.
author | Matt Shirley <mdshw5@gmail.com> |
---|---|
date | Thu, 13 Mar 2014 15:30:54 -0400 |
parents | 956e892b299d |
children | 558a88cd49e4 |
comparison
equal
deleted
inserted
replaced
5:76c7d617cd56 | 6:e4c21444a3ba |
---|---|
1 """ | 1 """ |
2 NCBI sra class | 2 NCBI sra class |
3 """ | 3 """ |
4 import logging | 4 import logging |
5 import binascii | 5 import binascii |
6 from galaxy.datatypes.data import * | 6 from galaxy.datatypes.data import nice_size |
7 from galaxy.datatypes.sniff import * | 7 from galaxy.datatypes.binary import Binary |
8 from galaxy.datatypes.binary import * | |
9 from galaxy.datatypes.metadata import * | |
10 | 8 |
11 log = logging.getLogger(__name__) | 9 log = logging.getLogger(__name__) |
12 | 10 |
13 class sra( Binary ): | 11 class Sra(Binary): |
14 """ Sequence Read Archive (SRA) """ | 12 """ Sequence Read Archive (SRA) """ |
15 file_ext = 'sra' | 13 file_ext = 'sra' |
16 | 14 |
17 def __init__( self, **kwd ): | 15 def __init__( self, **kwd ): |
18 Binary.__init__( self, **kwd ) | 16 Binary.__init__( self, **kwd ) |
17 | |
19 def sniff( self, filename ): | 18 def sniff( self, filename ): |
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ | 19 """ The first 8 bytes of any NCBI sra file is 'NCBI.sra', and the file is binary. |
21 submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'. | 20 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure |
22 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure | |
23 """ | 21 """ |
24 try: | 22 try: |
25 header = open(filename).read(8) | 23 header = open(filename).read(8) |
26 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): | 24 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): |
27 return True | 25 return True |
28 else: | 26 else: |
29 return False | 27 return False |
30 except: | 28 except: |
31 return False | 29 return False |
30 | |
32 def set_peek(self, dataset, is_multi_byte=False): | 31 def set_peek(self, dataset, is_multi_byte=False): |
33 if not dataset.dataset.purged: | 32 if not dataset.dataset.purged: |
34 dataset.peek = 'Binary sra file' | 33 dataset.peek = 'Binary sra file' |
35 dataset.blurb = data.nice_size(dataset.get_size()) | 34 dataset.blurb = nice_size(dataset.get_size()) |
36 else: | 35 else: |
37 dataset.peek = 'file does not exist' | 36 dataset.peek = 'file does not exist' |
38 dataset.blurb = 'file purged from disk' | 37 dataset.blurb = 'file purged from disk' |
38 | |
39 def display_peek(self, dataset): | 39 def display_peek(self, dataset): |
40 try: | 40 try: |
41 return dataset.peek | 41 return dataset.peek |
42 except: | 42 except: |
43 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) | 43 return 'Binary sra file (%s)' % (nice_size(dataset.get_size())) |
44 | 44 |
45 if hasattr(Binary, 'register_sniffable_binary_format'): | 45 Binary.register_sniffable_binary_format('sra', 'sra', 'Sra') |
46 Binary.register_sniffable_binary_format('sra', 'sra', sra) |