Mercurial > repos > jjohnson > cummerbund
annotate cuffdata.py @ 6:137aab1d9ac1
Add metadata to datatype: CuffDataDB
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Mon, 18 Nov 2013 16:43:15 -0600 |
parents | da7241f92ecf |
children | b5562b9a55c7 |
rev | line source |
---|---|
0 | 1 """ |
2 CuffData | |
3 """ | |
4 import logging | |
6
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
5 import os,os.path,sys,re |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
6 import tempfile |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
7 from subprocess import Popen |
0 | 8 import galaxy.datatypes.data |
9 from galaxy.datatypes.images import Html | |
10 from galaxy.datatypes.binary import Binary | |
11 from galaxy import util | |
12 from galaxy.datatypes.metadata import MetadataElement | |
13 | |
14 log = logging.getLogger(__name__) | |
15 | |
16 class CuffDiffData( Html ): | |
17 """ | |
18 CuffDiff output files: | |
19 run.info | |
20 read_groups.info | |
21 cds.count_tracking | |
22 cds.diff | |
23 cds.fpkm_tracking | |
24 cds.read_group_tracking | |
25 cds_exp.diff | |
26 gene_exp.diff | |
27 genes.count_tracking | |
28 genes.fpkm_tracking | |
29 genes.read_group_tracking | |
30 isoform_exp.diff | |
31 isoforms.count_tracking | |
32 isoforms.fpkm_tracking | |
33 isoforms.read_group_tracking | |
34 promoters.diff | |
35 splicing.diff | |
36 tss_group_exp.diff | |
37 tss_groups.count_tracking | |
38 tss_groups.fpkm_tracking | |
39 tss_groups.read_group_tracking | |
40 """ | |
41 file_ext = 'cuffdata' | |
42 is_binary = False | |
43 composite_type = 'auto_primary_file' | |
44 allow_datatype_change = False | |
45 def __init__( self, **kwd ): | |
46 Html.__init__( self, **kwd ) | |
47 self.add_composite_file('run.info', description = 'run.info', mimetype = 'text/html', optional = True, is_binary = False ) | |
48 self.add_composite_file('read_groups.info', description = 'read_groups.info', mimetype = 'text/html', optional = True, is_binary = False ) | |
49 self.add_composite_file('cds.count_tracking', description = 'cds.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
50 self.add_composite_file('cds.diff', description = 'cds.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
51 self.add_composite_file('cds.fpkm_tracking', description = 'cds.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
52 self.add_composite_file('cds.read_group_tracking', description = 'cds.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
53 self.add_composite_file('cds_exp.diff', description = 'cds_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
54 self.add_composite_file('gene_exp.diff', description = 'gene_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
55 self.add_composite_file('genes.count_tracking', description = 'genes.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
56 self.add_composite_file('genes.fpkm_tracking', description = 'genes.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
57 self.add_composite_file('genes.read_group_tracking', description = 'genes.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
58 self.add_composite_file('isoform_exp.diff', description = 'isoform_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
59 self.add_composite_file('isoforms.count_tracking', description = 'isoforms.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
60 self.add_composite_file('isoforms.fpkm_tracking', description = 'isoforms.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
61 self.add_composite_file('isoforms.read_group_tracking', description = 'isoforms.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
62 self.add_composite_file('promoters.diff', description = 'promoters.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
63 self.add_composite_file('splicing.diff', description = 'splicing.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
64 self.add_composite_file('tss_group_exp.diff', description = 'tss_group_exp.diff', mimetype = 'text/html', optional = True, is_binary = False ) | |
65 self.add_composite_file('tss_groups.count_tracking', description = 'tss_groups.count_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
66 self.add_composite_file('tss_groups.fpkm_tracking', description = 'tss_groups.fpkm_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
67 self.add_composite_file('tss_groups.read_group_tracking', description = 'tss_groups.read_group_tracking', mimetype = 'text/html', optional = True, is_binary = False ) | |
68 | |
69 def generate_primary_file( self, dataset = None ): | |
70 """ | |
71 This is called only at upload to write the html file | |
72 cannot rename the datasets here - they come with the default unfortunately | |
73 """ | |
74 rval = ['<html><head><title>CuffDiff Output</title></head>'] | |
75 rval.append('<body>') | |
76 rval.append('<p/>CuffDiff Outputs:<p/><ul>') | |
77 for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): | |
78 fn = composite_name | |
79 log.debug( "Velvet log info %s %s %s" % ('JJ generate_primary_file',fn,composite_file)) | |
80 opt_text = '' | |
81 if composite_file.optional: | |
82 opt_text = ' (optional)' | |
83 if composite_file.get('description'): | |
84 rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) ) | |
85 else: | |
86 rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) ) | |
87 rval.append( '</ul></body></html>' ) | |
88 return "\n".join( rval ) | |
89 | |
90 def regenerate_primary_file(self,dataset): | |
91 """ | |
92 cannot do this until we are setting metadata | |
93 """ | |
94 flist = os.listdir(dataset.extra_files_path) | |
95 rval = ['<html><head><title>CuffDiff Output</title></head>'] | |
96 rval.append('<body>') | |
97 rval.append('<p/>CuffDiff Outputs:<p/><ul>') | |
98 for i,fname in enumerate(flist): | |
99 sfname = os.path.split(fname)[-1] | |
100 rval.append( '<li><a href="%s" type="text/html">%s</a>' % ( sfname, sfname ) ) | |
101 rval.append( '</ul></body></html>' ) | |
102 f = file(dataset.file_name,'w') | |
103 f.write("\n".join( rval )) | |
104 f.write('\n') | |
105 f.close() | |
106 | |
107 def set_meta( self, dataset, **kwd ): | |
108 Html.set_meta( self, dataset, **kwd ) | |
109 self.regenerate_primary_file(dataset) | |
110 | |
111 def sniff( self, filename ): | |
112 return False | |
113 | |
114 class CuffDataDB( Binary ): | |
6
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
115 file_ext = 'cuffdatadb' |
0 | 116 is_binary = True |
117 allow_datatype_change = False | |
6
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
118 MetadataElement( name="sample_names", default=[], desc="Sample names", readonly=True, visible=True, optional=True, no_value=[] ) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
119 MetadataElement( name="replicate_names", default=[], desc="Replicate names", readonly=True, visible=True, optional=True, no_value=[] ) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
120 MetadataElement( name="gene_ids", default=[], desc="Gene Ids", readonly=True, visible=True, optional=True, no_value=[] ) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
121 |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
122 def __init__( self, **kwd ): |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
123 Binary.__init__( self, **kwd ) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
124 log.info('Creating cummeRbund CuffDataDB') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
125 |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
126 def set_meta( self, dataset, **kwd ): |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
127 def get_contents(fname): |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
128 contents = '' |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
129 with open(fname,'r') as fh: |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
130 contents = fh.read() |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
131 return contents |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
132 if not dataset.has_data(): |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
133 return |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
134 try: |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
135 ## Create a tmpdir |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
136 ## create an Rscript to write out info about the CuffData, e.g. samples replicates gene_ids |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
137 ## define file names to use as sinks for each type of data |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
138 # tmp_dir = tempfile.mkdtemp() |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
139 tmp_dir = '/tmp/gx/cuffdb' |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
140 if not os.path.isdir(tmp_dir): |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
141 os.makedirs(tmp_dir) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
142 rscript = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.r' ).name |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
143 rscript_fh = open( rscript, 'wb' ) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
144 rscript_fh.write('library(cummeRbund)\n') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
145 rscript_fh.write('cuff<-readCufflinks(dir = "", dbFile = "%s", rebuild = F)\n' % (dataset.file_name)) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
146 rscript_fh.write('sink("%s")\n' % ("out.blurb")) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
147 rscript_fh.write('print(cuff)\n') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
148 rscript_fh.write('sink()\n') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
149 rscript_fh.write('sink("%s")\n' % ("out.samples")) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
150 rscript_fh.write('cat(samples(cuff)[[2]],sep=",")\n') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
151 rscript_fh.write('sink()\n') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
152 rscript_fh.write('sink("%s")\n' % ("out.replicates")) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
153 rscript_fh.write('cat(replicates(cuff)[[4]],sep=",")\n') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
154 rscript_fh.write('sink()\n') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
155 rscript_fh.write('sink("%s")\n' % ("out.gene_ids")) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
156 rscript_fh.write('cat(annotation(genes(cuff))[[1]],sep=",")\n') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
157 rscript_fh.write('sink()\n') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
158 rscript_fh.close() |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
159 cmd = ( "Rscript --vanilla %s" % rscript ) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
160 tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
161 tmp_stderr = open( tmp_stderr_name, 'wb' ) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
162 proc = Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
163 returncode = proc.wait() |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
164 tmp_stderr.close() |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
165 flist = os.listdir(tmp_dir) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
166 for i,fname in enumerate(flist): |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
167 sfname = os.path.split(fname)[-1] |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
168 if sfname == 'out.blurb': |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
169 dataset.blurb = get_contents(os.path.join(tmp_dir,fname)) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
170 elif sfname == 'out.samples': |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
171 dataset.metadata.sample_names = get_contents(os.path.join(tmp_dir,fname)).split(',') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
172 elif sfname == 'out.replicates': |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
173 dataset.metadata.replicate_names = get_contents(os.path.join(tmp_dir,fname)).split(',') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
174 elif sfname == 'out.gene_ids': |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
175 dataset.metadata.gene_ids = get_contents(os.path.join(tmp_dir,fname)).split(',') |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
176 except Exception, e: |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
177 log.error('Error setting cummeRbund CuffDataDB metadata : %s' % str(e)) |
137aab1d9ac1
Add metadata to datatype: CuffDataDB
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
178 |