Mercurial > repos > matt-shirley > sra_tools
comparison sra.py @ 0:cdcc400dcafc draft
Migrated separate tools fastq_dump, sam_dump, and sra_fetch to this repository for further development.
author | matt-shirley <mdshw5@gmail.com> |
---|---|
date | Tue, 27 Nov 2012 13:31:09 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cdcc400dcafc |
---|---|
1 """ | |
2 Sra class | |
3 """ | |
4 | |
5 import galaxy.datatypes.binary | |
6 from galaxy.datatypes.binary import Binary | |
7 import data, logging, binascii | |
8 from galaxy.datatypes.metadata import MetadataElement | |
9 from galaxy.datatypes import metadata | |
10 from galaxy.datatypes.sniff import * | |
11 from galaxy import eggs | |
12 import pkg_resources | |
13 pkg_resources.require( "bx-python" ) | |
14 import os, subprocess, tempfile | |
15 import struct | |
16 | |
17 class Sra( Binary ): | |
18 """ Sequence Read Archive (SRA) """ | |
19 file_ext = "sra" | |
20 | |
21 def __init__( self, **kwd ): | |
22 Binary.__init__( self, **kwd ) | |
23 def sniff( self, filename ): | |
24 # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details | |
25 # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure | |
26 try: | |
27 header = open( filename ).read(8) | |
28 if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ): | |
29 return True | |
30 return False | |
31 except: | |
32 return False | |
33 def set_peek( self, dataset, is_multi_byte=False ): | |
34 if not dataset.dataset.purged: | |
35 dataset.peek = "Binary sra file" | |
36 dataset.blurb = data.nice_size( dataset.get_size() ) | |
37 else: | |
38 dataset.peek = 'file does not exist' | |
39 dataset.blurb = 'file purged from disk' | |
40 def display_peek( self, dataset ): | |
41 try: | |
42 return dataset.peek | |
43 except: | |
44 return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) ) | |
45 | |
46 Binary.register_sniffable_binary_format("sra", "sra", Sra) |