Mercurial > repos > matt-shirley > sra_tools
diff sra.py @ 0:cdcc400dcafc draft
Migrated separate tools fastq_dump, sam_dump, and sra_fetch to this repository for further development.
author | matt-shirley <mdshw5@gmail.com> |
---|---|
date | Tue, 27 Nov 2012 13:31:09 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra.py Tue Nov 27 13:31:09 2012 -0500 @@ -0,0 +1,46 @@ +""" +Sra class +""" + +import galaxy.datatypes.binary +from galaxy.datatypes.binary import Binary +import data, logging, binascii +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes import metadata +from galaxy.datatypes.sniff import * +from galaxy import eggs +import pkg_resources +pkg_resources.require( "bx-python" ) +import os, subprocess, tempfile +import struct + +class Sra( Binary ): + """ Sequence Read Archive (SRA) """ + file_ext = "sra" + + def __init__( self, **kwd ): + Binary.__init__( self, **kwd ) + def sniff( self, filename ): + # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details + # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure + try: + header = open( filename ).read(8) + if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ): + return True + return False + except: + return False + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "Binary sra file" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) ) + +Binary.register_sniffable_binary_format("sra", "sra", Sra)