view lib/galaxy/datatypes/igv.py @ 0:2eb1e2924c1a

Uploaded
author jjohnson
date Tue, 17 Jan 2012 21:53:25 -0500
parents
children
line wrap: on
line source

"""
IGV datatypes
"""
import logging,zipfile
import galaxy.datatypes.binary
from galaxy.datatypes.binary import Binary

log = logging.getLogger(__name__)

class TiledDataFile( Binary ):
    """Class describing an IGV tiled data file (TDF) .tdf  binary file"""
    file_ext = "igv.tdf"

    def __init__( self, **kwd ):
        Binary.__init__( self, **kwd )

    def sniff( self, filename ):
        # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details
        # about the format, see http://www.broadinstitute.org/software/igv/TDF
        try:
            header = open( filename ).read(4)
            if binascii.b2a_hex( header ) == binascii.hexlify( 'TDF3' ):
                return True
            return False
        except:
            return False

class GenomeDescriptor( Binary ):
    """Class describing an IGV .genome zip archive  file"""
    file_ext = "igv.genome"

    def __init__( self, **kwd ):
        Binary.__init__( self, **kwd )

    def sniff( self, filename ):
        # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details
        # about the format, see http://www.broadinstitute.org/software/igv/TDF
        # The zipfile should contain a file named 'property.txt' which should have a key named 'sequenceLocation'
        try:
            if filename != None and zipfile.is_zipfile(filename):
                genome_archive = zipfile.ZipFile(filename)
                if 'property.txt' in genome_archive.namelist():
                    fh = genome_archive.open('property.txt')
                    for i,l in enumerate(fh):
                        if l.startswith('sequenceLocation'):
                            return True
            return False
        except:
            return False