Mercurial > repos > jjohnson > gmap
view gmap/lib/galaxy/datatypes/gmap.py @ 0:d58d272914e7
Uploaded
author | jjohnson |
---|---|
date | Tue, 18 Oct 2011 12:42:42 -0400 |
parents | |
children | 52da588232b0 |
line wrap: on
line source
""" GMAP indexes """ import logging import os,os.path,re from data import Text from metadata import MetadataElement log = logging.getLogger(__name__) class GmapDB( Text ): """ A GMAP DB for indexes """ MetadataElement( name="db_name", desc="The db name for this index set", default='unknown', set_in_upload=True, readonly=True ) MetadataElement( name="basesize", default="12", desc="The basesize for offsetscomp", visible=True, readonly=True ) MetadataElement( name="kmers", default=[''], desc="The kmer sizes for indexes", visible=True, no_value=[''], readonly=True ) MetadataElement( name="map_dir", desc="The maps directory", default='unknown', set_in_upload=True, readonly=True ) MetadataElement( name="maps", default=[''], desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[''], readonly=True ) MetadataElement( name="snps", default=[''], desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[''], readonly=True ) MetadataElement( name="cmet", default=False, desc="Has a cmet index", visible=True, readonly=True ) MetadataElement( name="atoi", default=False, desc="Has a atoi index", visible=True, readonly=True ) file_ext = 'gmapdb' is_binary = True composite_type = 'auto_primary_file' allow_datatype_change = False def generate_primary_file( self, dataset = None ): """ This is called only at upload to write the html file cannot rename the datasets here - they come with the default unfortunately """ return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>' def regenerate_primary_file(self,dataset): """ cannot do this until we are setting metadata """ bn = dataset.metadata.db_name log.info( "GmapDB regenerate_primary_file %s" % (bn)) rval = ['<html><head><title>GMAPDB %s</title></head><p/><H3>GMAPDB %s</H3><p/>cmet %s<br>atoi %s<H4>Maps:</H4><ul>' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)] for i,name in enumerate(dataset.metadata.maps): rval.append( '<li>%s' % name) rval.append( '</ul></html>' ) f = file(dataset.file_name,'w') f.write("\n".join( rval )) f.write('\n') f.close() def set_peek( self, dataset, is_multi_byte=False ): log.info( "GmapDB set_peek %s" % (dataset)) if not dataset.dataset.purged: dataset.peek = "GMAPDB index %s\n cmet %s\n atoi %s\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps ) dataset.blurb = "GMAPDB %s" % ( dataset.metadata.db_name ) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek( self, dataset ): try: return dataset.peek except: return "GMAP index file" def sniff( self, filename ): return False def set_meta( self, dataset, overwrite = True, **kwd ): """ Expecting: extra_files_path/<db_name>/db_name>.ref<basesize><kmer>3<index> extra_files_path/db_name/db_name.ref1[2345]1[2345]3offsetscomp extra_files_path/db_name/db_name.ref1[2345]1[2345]3positions extra_files_path/db_name/db_name.ref1[2345]1[2345]3gammaptrs index maps: extra_files_path/db_name/db_name.maps/*.iit """ log.info( "GmapDB set_meta %s %s" % (dataset,dataset.extra_files_path)) pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?3positions(\.(.+))?' efp = dataset.extra_files_path flist = os.listdir(efp) for i,fname in enumerate(flist): log.info( "GmapDB set_meta %s %s" % (i,fname)) fpath = os.path.join(efp,fname) if os.path.isdir(fpath): ilist = os.listdir(fpath) kmers = {'':'default'} # HACK '' empty key added so user has default choice when selecting kmer from metadata for j,iname in enumerate(ilist): log.info( "GmapDB set_meta file %s %s" % (j,iname)) ipath = os.path.join(fpath,iname) if os.path.isdir(ipath): # find maps dataset.metadata.map_dir = iname for mapfile in os.listdir(ipath): mapname = mapfile.replace('.iit','') log.info( "GmapDB set_meta map %s %s" % (mapname,mapfile)) dataset.metadata.maps.append(mapname) else: m = re.match(pat,iname) if m: log.info( "GmapDB set_meta m %s %s " % (iname, m)) assert len(m.groups()) == 10 dataset.metadata.db_name = fname if m.groups()[2] == 'ref': if m.groups()[-1] != None: dataset.metadata.snps.append(m.groups()[-1]) else: if m.groups()[-3] != None: k = int(m.groups()[-3]) kmers[k] = k if m.groups()[-4] != None: dataset.metadata.basesize = int( m.groups()[-4]) elif m.groups()[3] == 'met': dataset.metadata.cmet = True elif m.groups()[4] == 'a2i': dataset.metadata.atoi = True dataset.metadata.kmers = kmers.keys() ## class IntervalIndexTree( Text ): ## """ ## A GMAP Interval Index Tree Map ## created by iit_store ## (/path/to/map)/(mapname).iit ## """ ## MetadataElement( name="map_name", desc="The map name for this index set", default='unknown', set_in_upload=True, readonly=False ) ## file_ext = 'iit' ## is_binary = True ## composite_type = 'auto_primary_file' ## allow_datatype_change = False ## ## class IntervalAnnotation(data.Text): ## """ ## Class describing a GMAP Interval format: ## >label coords optional_tag ## optional_annotation (which may be zero, one, or multiple lines) ## The coords should be of the form: ## chr:position ## chr:startposition..endposition ## """ ## file_ext = 'gmapannotation' ## ## class SpliceSiteAnnotation(IntervalAnnotation): ## file_ext = 'gmapsplicesites' ## """ ## Example: ## >NM_004448.ERBB2.exon1 17:35110090..35110091 donor 6678 ## >NM_004448.ERBB2.exon2 17:35116768..35116769 acceptor 6678 ## >NM_004448.ERBB2.exon2 17:35116920..35116921 donor 1179 ## >NM_004448.ERBB2.exon3 17:35118099..35118100 acceptor 1179 ## >NM_004449.ERG.exon1 21:38955452..38955451 donor 783 ## >NM_004449.ERG.exon2 21:38878740..38878739 acceptor 783 ## >NM_004449.ERG.exon2 21:38878638..38878637 donor 360 ## >NM_004449.ERG.exon3 21:38869542..38869541 acceptor 360 ## """ ## ## class IntronAnnotation(IntervalAnnotation): ## file_ext = 'gmapintrons' ## """ ## Example: ## >NM_004448.ERBB2.intron1 17:35110090..35116769 ## >NM_004448.ERBB2.intron2 17:35116920..35118100 ## >NM_004449.ERG.intron1 21:38955452..38878739 ## >NM_004449.ERG.intron2 21:38878638..38869541 ## """ ## ## class SNPAnnotation(IntervalAnnotation): ## file_ext = 'gmapsnps' ## """ ## Example: ## >rs62211261 21:14379270 CG ## >rs62211262 21:14379281 CG ## """