Mercurial > repos > matt-shirley > sra_tools
view sra.py @ 0:cdcc400dcafc draft
Migrated separate tools fastq_dump, sam_dump, and sra_fetch to this repository for further development.
author | matt-shirley <mdshw5@gmail.com> |
---|---|
date | Tue, 27 Nov 2012 13:31:09 -0500 |
parents | |
children |
line wrap: on
line source
""" Sra class """ import galaxy.datatypes.binary from galaxy.datatypes.binary import Binary import data, logging, binascii from galaxy.datatypes.metadata import MetadataElement from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * from galaxy import eggs import pkg_resources pkg_resources.require( "bx-python" ) import os, subprocess, tempfile import struct class Sra( Binary ): """ Sequence Read Archive (SRA) """ file_ext = "sra" def __init__( self, **kwd ): Binary.__init__( self, **kwd ) def sniff( self, filename ): # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure try: header = open( filename ).read(8) if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ): return True return False except: return False def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: dataset.peek = "Binary sra file" dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek( self, dataset ): try: return dataset.peek except: return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) ) Binary.register_sniffable_binary_format("sra", "sra", Sra)