view sra.py @ 0:cdcc400dcafc draft

Migrated separate tools fastq_dump, sam_dump, and sra_fetch to this repository for further development.
author matt-shirley <mdshw5@gmail.com>
date Tue, 27 Nov 2012 13:31:09 -0500
parents
children
line wrap: on
line source

"""
Sra class
"""

import galaxy.datatypes.binary
from galaxy.datatypes.binary import Binary
import data, logging, binascii
from galaxy.datatypes.metadata import MetadataElement
from galaxy.datatypes import metadata
from galaxy.datatypes.sniff import *
from galaxy import eggs
import pkg_resources
pkg_resources.require( "bx-python" )
import os, subprocess, tempfile
import struct

class Sra( Binary ):
    """ Sequence Read Archive (SRA) """
    file_ext = "sra"

    def __init__( self, **kwd ):
        Binary.__init__( self, **kwd )
    def sniff( self, filename ):
        # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details
        # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
        try:
            header = open( filename ).read(8)
            if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ):
                return True
            return False
        except:
            return False
    def set_peek( self, dataset, is_multi_byte=False ):
        if not dataset.dataset.purged:
            dataset.peek  = "Binary sra file" 
            dataset.blurb = data.nice_size( dataset.get_size() )
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'
    def display_peek( self, dataset ):
        try:
            return dataset.peek
        except:
            return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) )

Binary.register_sniffable_binary_format("sra", "sra", Sra)