Mercurial > repos > jjohnson > cummerbund
annotate cuffdata.py @ 7:b5562b9a55c7
Use same bias_correction cached param as devteam cuffdiff
| author | Jim Johnson <jj@umn.edu> |
|---|---|
| date | Mon, 13 Oct 2014 09:12:47 -0500 |
| parents | 137aab1d9ac1 |
| children |
| rev | line source |
|---|---|
| 0 | 1 """ |
| 2 CuffData | |
| 3 """ | |
| 4 import logging | |
|
6
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
5 import os,os.path,sys,re |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
6 import tempfile |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
7 from subprocess import Popen |
| 0 | 8 import galaxy.datatypes.data |
| 9 from galaxy.datatypes.images import Html | |
| 10 from galaxy.datatypes.binary import Binary | |
| 11 from galaxy import util | |
| 12 from galaxy.datatypes.metadata import MetadataElement | |
| 13 | |
| 14 log = logging.getLogger(__name__) | |
| 15 | |
| 16 class CuffDiffData( Html ): | |
| 17 """ | |
| 18 CuffDiff output files: | |
| 19 run.info | |
| 20 read_groups.info | |
| 21 cds.count_tracking | |
| 22 cds.diff | |
| 23 cds.fpkm_tracking | |
| 24 cds.read_group_tracking | |
| 25 cds_exp.diff | |
| 26 gene_exp.diff | |
| 27 genes.count_tracking | |
| 28 genes.fpkm_tracking | |
| 29 genes.read_group_tracking | |
| 30 isoform_exp.diff | |
| 31 isoforms.count_tracking | |
| 32 isoforms.fpkm_tracking | |
| 33 isoforms.read_group_tracking | |
| 34 promoters.diff | |
| 35 splicing.diff | |
| 36 tss_group_exp.diff | |
| 37 tss_groups.count_tracking | |
| 38 tss_groups.fpkm_tracking | |
| 39 tss_groups.read_group_tracking | |
| 40 """ | |
| 41 file_ext = 'cuffdata' | |
| 42 is_binary = False | |
| 43 composite_type = 'auto_primary_file' | |
| 44 allow_datatype_change = False | |
| 45 def __init__( self, **kwd ): | |
| 46 Html.__init__( self, **kwd ) | |
| 47 self.add_composite_file('run.info', description = 'run.info', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 48 self.add_composite_file('read_groups.info', description = 'read_groups.info', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 49 self.add_composite_file('cds.count_tracking', description = 'cds.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 50 self.add_composite_file('cds.diff', description = 'cds.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 51 self.add_composite_file('cds.fpkm_tracking', description = 'cds.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 52 self.add_composite_file('cds.read_group_tracking', description = 'cds.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 53 self.add_composite_file('cds_exp.diff', description = 'cds_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 54 self.add_composite_file('gene_exp.diff', description = 'gene_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 55 self.add_composite_file('genes.count_tracking', description = 'genes.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 56 self.add_composite_file('genes.fpkm_tracking', description = 'genes.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 57 self.add_composite_file('genes.read_group_tracking', description = 'genes.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 58 self.add_composite_file('isoform_exp.diff', description = 'isoform_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 59 self.add_composite_file('isoforms.count_tracking', description = 'isoforms.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 60 self.add_composite_file('isoforms.fpkm_tracking', description = 'isoforms.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 61 self.add_composite_file('isoforms.read_group_tracking', description = 'isoforms.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 62 self.add_composite_file('promoters.diff', description = 'promoters.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 63 self.add_composite_file('splicing.diff', description = 'splicing.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 64 self.add_composite_file('tss_group_exp.diff', description = 'tss_group_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 65 self.add_composite_file('tss_groups.count_tracking', description = 'tss_groups.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 66 self.add_composite_file('tss_groups.fpkm_tracking', description = 'tss_groups.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 67 self.add_composite_file('tss_groups.read_group_tracking', description = 'tss_groups.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
| 68 | |
| 69 def generate_primary_file( self, dataset = None ): | |
| 70 """ | |
| 71 This is called only at upload to write the html file | |
| 72 cannot rename the datasets here - they come with the default unfortunately | |
| 73 """ | |
| 74 rval = ['<html><head><title>CuffDiff Output</title></head>'] | |
| 75 rval.append('<body>') | |
| 76 rval.append('<p/>CuffDiff Outputs:<p/><ul>') | |
| 77 for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): | |
| 78 fn = composite_name | |
| 79 log.debug( "Velvet log info %s %s %s" % ('JJ generate_primary_file',fn,composite_file)) | |
| 80 opt_text = '' | |
| 81 if composite_file.optional: | |
| 82 opt_text = ' (optional)' | |
| 83 if composite_file.get('description'): | |
| 84 rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) ) | |
| 85 else: | |
| 86 rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) ) | |
| 87 rval.append( '</ul></body></html>' ) | |
| 88 return "\n".join( rval ) | |
| 89 | |
| 90 def regenerate_primary_file(self,dataset): | |
| 91 """ | |
| 92 cannot do this until we are setting metadata | |
| 93 """ | |
| 94 flist = os.listdir(dataset.extra_files_path) | |
| 95 rval = ['<html><head><title>CuffDiff Output</title></head>'] | |
| 96 rval.append('<body>') | |
| 97 rval.append('<p/>CuffDiff Outputs:<p/><ul>') | |
| 98 for i,fname in enumerate(flist): | |
| 99 sfname = os.path.split(fname)[-1] | |
| 100 rval.append( '<li><a href="%s" type="text/html">%s</a>' % ( sfname, sfname ) ) | |
| 101 rval.append( '</ul></body></html>' ) | |
| 102 f = file(dataset.file_name,'w') | |
| 103 f.write("\n".join( rval )) | |
| 104 f.write('\n') | |
| 105 f.close() | |
| 106 | |
| 107 def set_meta( self, dataset, **kwd ): | |
| 108 Html.set_meta( self, dataset, **kwd ) | |
| 109 self.regenerate_primary_file(dataset) | |
| 110 | |
| 111 def sniff( self, filename ): | |
| 112 return False | |
| 113 | |
| 114 class CuffDataDB( Binary ): | |
|
6
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
115 file_ext = 'cuffdatadb' |
| 0 | 116 is_binary = True |
| 117 allow_datatype_change = False | |
|
6
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
118 MetadataElement( name="sample_names", default=[], desc="Sample names", readonly=True, visible=True, optional=True, no_value=[] ) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
119 MetadataElement( name="replicate_names", default=[], desc="Replicate names", readonly=True, visible=True, optional=True, no_value=[] ) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
120 MetadataElement( name="gene_ids", default=[], desc="Gene Ids", readonly=True, visible=True, optional=True, no_value=[] ) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
121 |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
122 def __init__( self, **kwd ): |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
123 Binary.__init__( self, **kwd ) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
124 log.info('Creating cummeRbund CuffDataDB') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
125 |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
126 def set_meta( self, dataset, **kwd ): |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
127 def get_contents(fname): |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
128 contents = '' |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
129 with open(fname,'r') as fh: |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
130 contents = fh.read() |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
131 return contents |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
132 if not dataset.has_data(): |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
133 return |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
134 try: |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
135 ## Create a tmpdir |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
136 ## create an Rscript to write out info about the CuffData, e.g. samples replicates gene_ids |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
137 ## define file names to use as sinks for each type of data |
|
7
b5562b9a55c7
Use same bias_correction cached param as devteam cuffdiff
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
138 tmp_dir = tempfile.mkdtemp() |
|
6
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
139 if not os.path.isdir(tmp_dir): |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
140 os.makedirs(tmp_dir) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
141 rscript = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.r' ).name |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
142 rscript_fh = open( rscript, 'wb' ) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
143 rscript_fh.write('library(cummeRbund)\n') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
144 rscript_fh.write('cuff<-readCufflinks(dir = "", dbFile = "%s", rebuild = F)\n' % (dataset.file_name)) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
145 rscript_fh.write('sink("%s")\n' % ("out.blurb")) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
146 rscript_fh.write('print(cuff)\n') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
147 rscript_fh.write('sink()\n') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
148 rscript_fh.write('sink("%s")\n' % ("out.samples")) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
149 rscript_fh.write('cat(samples(cuff)[[2]],sep=",")\n') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
150 rscript_fh.write('sink()\n') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
151 rscript_fh.write('sink("%s")\n' % ("out.replicates")) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
152 rscript_fh.write('cat(replicates(cuff)[[4]],sep=",")\n') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
153 rscript_fh.write('sink()\n') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
154 rscript_fh.write('sink("%s")\n' % ("out.gene_ids")) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
155 rscript_fh.write('cat(annotation(genes(cuff))[[1]],sep=",")\n') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
156 rscript_fh.write('sink()\n') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
157 rscript_fh.close() |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
158 cmd = ( "Rscript --vanilla %s" % rscript ) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
159 tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
160 tmp_stderr = open( tmp_stderr_name, 'wb' ) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
161 proc = Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
162 returncode = proc.wait() |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
163 tmp_stderr.close() |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
164 flist = os.listdir(tmp_dir) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
165 for i,fname in enumerate(flist): |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
166 sfname = os.path.split(fname)[-1] |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
167 if sfname == 'out.blurb': |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
168 dataset.blurb = get_contents(os.path.join(tmp_dir,fname)) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
169 elif sfname == 'out.samples': |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
170 dataset.metadata.sample_names = get_contents(os.path.join(tmp_dir,fname)).split(',') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
171 elif sfname == 'out.replicates': |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
172 dataset.metadata.replicate_names = get_contents(os.path.join(tmp_dir,fname)).split(',') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
173 elif sfname == 'out.gene_ids': |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
174 dataset.metadata.gene_ids = get_contents(os.path.join(tmp_dir,fname)).split(',') |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
175 except Exception, e: |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
176 log.error('Error setting cummeRbund CuffDataDB metadata : %s' % str(e)) |
|
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
177 |
