# HG changeset patch # User matt-shirley # Date 1353276744 18000 # Node ID 61d634d123c001568af02c5096ea21831ca941a3 # Parent d7708f338c826776ffd999465532da609a24a3e3 Added datatypes_conf and sra class. diff -r d7708f338c82 -r 61d634d123c0 datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Sun Nov 18 17:12:24 2012 -0500 @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff -r d7708f338c82 -r 61d634d123c0 sra.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra.py Sun Nov 18 17:12:24 2012 -0500 @@ -0,0 +1,46 @@ +""" +Sra class +""" + +import galaxy.datatypes.binary +from galaxy.datatypes.binary import Binary +import data, logging, binascii +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes import metadata +from galaxy.datatypes.sniff import * +from galaxy import eggs +import pkg_resources +pkg_resources.require( "bx-python" ) +import os, subprocess, tempfile +import struct + +class Sra( Binary ): + """ Sequence Read Archive (SRA) """ + file_ext = "sra" + + def __init__( self, **kwd ): + Binary.__init__( self, **kwd ) + def sniff( self, filename ): + # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details + # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure + try: + header = open( filename ).read(8) + if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ): + return True + return False + except: + return False + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "Binary sra file" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) ) + +Binary.register_sniffable_binary_format("sra", "sra", Sra)