Mercurial > repos > jjohnson > cummerbund
comparison cuffdata.py @ 6:137aab1d9ac1
Add metadata to datatype: CuffDataDB
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Mon, 18 Nov 2013 16:43:15 -0600 |
parents | da7241f92ecf |
children | b5562b9a55c7 |
comparison
equal
deleted
inserted
replaced
5:2bb88bf1c1dd | 6:137aab1d9ac1 |
---|---|
1 """ | 1 """ |
2 CuffData | 2 CuffData |
3 """ | 3 """ |
4 import logging | 4 import logging |
5 import os,os.path,re | 5 import os,os.path,sys,re |
6 import tempfile | |
7 from subprocess import Popen | |
6 import galaxy.datatypes.data | 8 import galaxy.datatypes.data |
7 from galaxy.datatypes.images import Html | 9 from galaxy.datatypes.images import Html |
8 from galaxy.datatypes.binary import Binary | 10 from galaxy.datatypes.binary import Binary |
9 from galaxy import util | 11 from galaxy import util |
10 from galaxy.datatypes.metadata import MetadataElement | 12 from galaxy.datatypes.metadata import MetadataElement |
108 | 110 |
109 def sniff( self, filename ): | 111 def sniff( self, filename ): |
110 return False | 112 return False |
111 | 113 |
112 class CuffDataDB( Binary ): | 114 class CuffDataDB( Binary ): |
113 file_ext = 'cuffdata' | 115 file_ext = 'cuffdatadb' |
114 is_binary = True | 116 is_binary = True |
115 allow_datatype_change = False | 117 allow_datatype_change = False |
118 MetadataElement( name="sample_names", default=[], desc="Sample names", readonly=True, visible=True, optional=True, no_value=[] ) | |
119 MetadataElement( name="replicate_names", default=[], desc="Replicate names", readonly=True, visible=True, optional=True, no_value=[] ) | |
120 MetadataElement( name="gene_ids", default=[], desc="Gene Ids", readonly=True, visible=True, optional=True, no_value=[] ) | |
121 | |
122 def __init__( self, **kwd ): | |
123 Binary.__init__( self, **kwd ) | |
124 log.info('Creating cummeRbund CuffDataDB') | |
125 | |
126 def set_meta( self, dataset, **kwd ): | |
127 def get_contents(fname): | |
128 contents = '' | |
129 with open(fname,'r') as fh: | |
130 contents = fh.read() | |
131 return contents | |
132 if not dataset.has_data(): | |
133 return | |
134 try: | |
135 ## Create a tmpdir | |
136 ## create an Rscript to write out info about the CuffData, e.g. samples replicates gene_ids | |
137 ## define file names to use as sinks for each type of data | |
138 # tmp_dir = tempfile.mkdtemp() | |
139 tmp_dir = '/tmp/gx/cuffdb' | |
140 if not os.path.isdir(tmp_dir): | |
141 os.makedirs(tmp_dir) | |
142 rscript = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.r' ).name | |
143 rscript_fh = open( rscript, 'wb' ) | |
144 rscript_fh.write('library(cummeRbund)\n') | |
145 rscript_fh.write('cuff<-readCufflinks(dir = "", dbFile = "%s", rebuild = F)\n' % (dataset.file_name)) | |
146 rscript_fh.write('sink("%s")\n' % ("out.blurb")) | |
147 rscript_fh.write('print(cuff)\n') | |
148 rscript_fh.write('sink()\n') | |
149 rscript_fh.write('sink("%s")\n' % ("out.samples")) | |
150 rscript_fh.write('cat(samples(cuff)[[2]],sep=",")\n') | |
151 rscript_fh.write('sink()\n') | |
152 rscript_fh.write('sink("%s")\n' % ("out.replicates")) | |
153 rscript_fh.write('cat(replicates(cuff)[[4]],sep=",")\n') | |
154 rscript_fh.write('sink()\n') | |
155 rscript_fh.write('sink("%s")\n' % ("out.gene_ids")) | |
156 rscript_fh.write('cat(annotation(genes(cuff))[[1]],sep=",")\n') | |
157 rscript_fh.write('sink()\n') | |
158 rscript_fh.close() | |
159 cmd = ( "Rscript --vanilla %s" % rscript ) | |
160 tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name | |
161 tmp_stderr = open( tmp_stderr_name, 'wb' ) | |
162 proc = Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) | |
163 returncode = proc.wait() | |
164 tmp_stderr.close() | |
165 flist = os.listdir(tmp_dir) | |
166 for i,fname in enumerate(flist): | |
167 sfname = os.path.split(fname)[-1] | |
168 if sfname == 'out.blurb': | |
169 dataset.blurb = get_contents(os.path.join(tmp_dir,fname)) | |
170 elif sfname == 'out.samples': | |
171 dataset.metadata.sample_names = get_contents(os.path.join(tmp_dir,fname)).split(',') | |
172 elif sfname == 'out.replicates': | |
173 dataset.metadata.replicate_names = get_contents(os.path.join(tmp_dir,fname)).split(',') | |
174 elif sfname == 'out.gene_ids': | |
175 dataset.metadata.gene_ids = get_contents(os.path.join(tmp_dir,fname)).split(',') | |
176 except Exception, e: | |
177 log.error('Error setting cummeRbund CuffDataDB metadata : %s' % str(e)) | |
178 |