comparison cuffdata.py @ 6:137aab1d9ac1

Add metadata to datatype: CuffDataDB
author Jim Johnson <jj@umn.edu>
date Mon, 18 Nov 2013 16:43:15 -0600
parents da7241f92ecf
children b5562b9a55c7
comparison
equal deleted inserted replaced
5:2bb88bf1c1dd 6:137aab1d9ac1
1 """ 1 """
2 CuffData 2 CuffData
3 """ 3 """
4 import logging 4 import logging
5 import os,os.path,re 5 import os,os.path,sys,re
6 import tempfile
7 from subprocess import Popen
6 import galaxy.datatypes.data 8 import galaxy.datatypes.data
7 from galaxy.datatypes.images import Html 9 from galaxy.datatypes.images import Html
8 from galaxy.datatypes.binary import Binary 10 from galaxy.datatypes.binary import Binary
9 from galaxy import util 11 from galaxy import util
10 from galaxy.datatypes.metadata import MetadataElement 12 from galaxy.datatypes.metadata import MetadataElement
108 110
109 def sniff( self, filename ): 111 def sniff( self, filename ):
110 return False 112 return False
111 113
112 class CuffDataDB( Binary ): 114 class CuffDataDB( Binary ):
113 file_ext = 'cuffdata' 115 file_ext = 'cuffdatadb'
114 is_binary = True 116 is_binary = True
115 allow_datatype_change = False 117 allow_datatype_change = False
118 MetadataElement( name="sample_names", default=[], desc="Sample names", readonly=True, visible=True, optional=True, no_value=[] )
119 MetadataElement( name="replicate_names", default=[], desc="Replicate names", readonly=True, visible=True, optional=True, no_value=[] )
120 MetadataElement( name="gene_ids", default=[], desc="Gene Ids", readonly=True, visible=True, optional=True, no_value=[] )
121
122 def __init__( self, **kwd ):
123 Binary.__init__( self, **kwd )
124 log.info('Creating cummeRbund CuffDataDB')
125
126 def set_meta( self, dataset, **kwd ):
127 def get_contents(fname):
128 contents = ''
129 with open(fname,'r') as fh:
130 contents = fh.read()
131 return contents
132 if not dataset.has_data():
133 return
134 try:
135 ## Create a tmpdir
136 ## create an Rscript to write out info about the CuffData, e.g. samples replicates gene_ids
137 ## define file names to use as sinks for each type of data
138 # tmp_dir = tempfile.mkdtemp()
139 tmp_dir = '/tmp/gx/cuffdb'
140 if not os.path.isdir(tmp_dir):
141 os.makedirs(tmp_dir)
142 rscript = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.r' ).name
143 rscript_fh = open( rscript, 'wb' )
144 rscript_fh.write('library(cummeRbund)\n')
145 rscript_fh.write('cuff<-readCufflinks(dir = "", dbFile = "%s", rebuild = F)\n' % (dataset.file_name))
146 rscript_fh.write('sink("%s")\n' % ("out.blurb"))
147 rscript_fh.write('print(cuff)\n')
148 rscript_fh.write('sink()\n')
149 rscript_fh.write('sink("%s")\n' % ("out.samples"))
150 rscript_fh.write('cat(samples(cuff)[[2]],sep=",")\n')
151 rscript_fh.write('sink()\n')
152 rscript_fh.write('sink("%s")\n' % ("out.replicates"))
153 rscript_fh.write('cat(replicates(cuff)[[4]],sep=",")\n')
154 rscript_fh.write('sink()\n')
155 rscript_fh.write('sink("%s")\n' % ("out.gene_ids"))
156 rscript_fh.write('cat(annotation(genes(cuff))[[1]],sep=",")\n')
157 rscript_fh.write('sink()\n')
158 rscript_fh.close()
159 cmd = ( "Rscript --vanilla %s" % rscript )
160 tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name
161 tmp_stderr = open( tmp_stderr_name, 'wb' )
162 proc = Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
163 returncode = proc.wait()
164 tmp_stderr.close()
165 flist = os.listdir(tmp_dir)
166 for i,fname in enumerate(flist):
167 sfname = os.path.split(fname)[-1]
168 if sfname == 'out.blurb':
169 dataset.blurb = get_contents(os.path.join(tmp_dir,fname))
170 elif sfname == 'out.samples':
171 dataset.metadata.sample_names = get_contents(os.path.join(tmp_dir,fname)).split(',')
172 elif sfname == 'out.replicates':
173 dataset.metadata.replicate_names = get_contents(os.path.join(tmp_dir,fname)).split(',')
174 elif sfname == 'out.gene_ids':
175 dataset.metadata.gene_ids = get_contents(os.path.join(tmp_dir,fname)).split(',')
176 except Exception, e:
177 log.error('Error setting cummeRbund CuffDataDB metadata : %s' % str(e))
178