view snpsift_dbnsfp.py @ 0:0c4372b93e85 draft default tip

Uploaded
author iuc
date Thu, 22 Jan 2015 08:04:59 -0500
parents
children
line wrap: on
line source

"""
SnpSift dbNSFP datatypes
"""
import os,os.path,re,sys,gzip,logging
import traceback
import galaxy.datatypes.data
from galaxy.datatypes.data import Text
from galaxy.datatypes.metadata import MetadataElement

log = logging.getLogger(__name__)

class SnpSiftDbNSFP( Text ):
    """Class describing a dbNSFP database prepared fpr use by SnpSift dbnsfp """
    MetadataElement( name='reference_name', default='dbSNFP' , desc='Reference Name', readonly=True, visible=True, set_in_upload=True, no_value='dbSNFP' )
    MetadataElement( name="bgzip", default=None, desc="dbNSFP bgzip", readonly=True, visible=True, no_value=None )
    MetadataElement( name="index", default=None, desc="Tabix Index File", readonly=True, visible=True, no_value=None)
    MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[] )
    file_ext = "snpsiftdbnsfp"
    composite_type = 'auto_primary_file'
    allow_datatype_change = False
    """
    ## The dbNSFP file is a tabular file with 1 header line 
    ## The first 4 columns are required to be: chrom	pos	ref	alt
    ## These match columns 1,2,4,5 of the VCF file
    ## SnpSift requires the file to be block-gzipped and the indexed with samtools tabix
    ## Example:
    ## Compress using block-gzip algorithm
    bgzip dbNSFP2.3.txt
    ## Create tabix index
    tabix -s 1 -b 2 -e 2 dbNSFP2.3.txt.gz
    """
    def __init__( self, **kwd ):
        Text.__init__( self, **kwd )
        self.add_composite_file( '%s.grp', description = 'Group File', substitute_name_with_metadata = 'reference_name', is_binary = False )
        self.add_composite_file( '%s.ti', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False )
    def init_meta( self, dataset, copy_from=None ):
        Text.init_meta( self, dataset, copy_from=copy_from )
    def generate_primary_file( self, dataset = None ):
        """ 
        This is called only at upload to write the html file
        cannot rename the datasets here - they come with the default unfortunately
        """
        regenerate_primary_file( self, dataset)
    def regenerate_primary_file(self,dataset):
        """
        cannot do this until we are setting metadata 
        """
        annotations = "dbNSFP Annotations: %s\n" % ','.join(dataset.metadata.annotation)
        f = open(dataset.file_name,'a')
        if dataset.metadata.bgzip:
            bn = dataset.metadata.bgzip
            f.write(bn)
            f.write('\n')
        f.write(annotations)
        f.close()
    def set_meta( self, dataset, overwrite=True, **kwd ):
        try:
            efp = dataset.extra_files_path
            if os.path.exists(efp):
                flist = os.listdir(efp)
                for i,fname in enumerate(flist):
                    if fname.endswith('.gz'):
                        dataset.metadata.bgzip = fname
                        try:
                            fh = gzip.open(os.path.join(efp,fname),'r')
                            buf = fh.read(5000)
                            lines = buf.splitlines()
                            headers = lines[0].split('\t')
                            dataset.metadata.annotation = headers[4:]
                        except Exception,e:        
                            log.warn("set_meta fname: %s  %s" % (fname,str(e)))
                            traceback.print_stack(file=sys.stderr)
                        finally:
                            fh.close()
                    if fname.endswith('.tbi'):
                        dataset.metadata.index = fname
            self.regenerate_primary_file(dataset)
        except Exception,e:
            log.warn("set_meta fname: %s  %s" % (dataset.file_name if dataset and dataset.file_name else 'Unkwown',str(e)))
            traceback.print_stack(file=sys.stderr)

if __name__ == '__main__':
    import doctest
    doctest.testmod(sys.modules[__name__])