comparison jbrowse.py @ 1:497c6bb3b717 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
author iuc
date Thu, 18 Jun 2015 12:10:51 -0400
parents 2c9e5136b416
children 7342f467507b
comparison
equal deleted inserted replaced
0:2c9e5136b416 1:497c6bb3b717
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 from string import Template
2 import os 3 import os
3 import shutil
4 import argparse 4 import argparse
5 import subprocess 5 import subprocess
6 import hashlib 6 import hashlib
7 import tempfile
8 import json
9 import yaml
10 import logging
11 logging.basicConfig()
12 log = logging.getLogger(__name__)
13
14 COLOR_FUNCTION_TEMPLATE = Template("""
15 function(feature, variableName, glyphObject, track) {
16 var score = ${score};
17 ${opacity}
18 return 'rgba(${red}, ${green}, ${blue}, ' + opacity + ')';
19 }
20 """)
21
22 BLAST_OPACITY_MATH = """
23 var opacity = 0;
24 if(score == 0.0) {
25 opacity = 1;
26 } else{
27 opacity = (20 - Math.log10(score)) / 180;
28 }
29 """
30
31 BREWER_COLOUR_IDX = 0
32 BREWER_COLOUR_SCHEMES = [
33 (228, 26, 28),
34 (55, 126, 184),
35 (77, 175, 74),
36 (152, 78, 163),
37 (255, 127, 0),
38 ]
39
40
41 # score comes from feature._parent.get('score') or feature.get('score')
42 # Opacity math
7 43
8 TN_TABLE = { 44 TN_TABLE = {
9 'gff3': '--gff', 45 'gff3': '--gff',
10 'gff': '--gff', 46 'gff': '--gff',
11 'bed': '--bed', 47 'bed': '--bed',
12 # 'genbank': '--gbk', 48 # 'genbank': '--gbk',
13 } 49 }
14 50
15 51 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
16 def process_genome(jbrowse_dir, genome): 52
17 subprocess.check_output(['perl', 'bin/prepare-refseqs.pl', '--fasta', genome], cwd=jbrowse_dir) 53
18 54 class JbrowseConnector(object):
19 55
20 def process_annotations(jbrowse_dir, annot_file, annot_label, annot_format, 56 def __init__(self, jbrowse, jbrowse_dir, outdir, genome):
21 **kwargs): 57 self.jbrowse = jbrowse
22 key = hashlib.md5(annot_file).hexdigest() 58 self.jbrowse_dir = jbrowse_dir
23 59 self.outdir = outdir
24 cmd = ['perl', 'bin/flatfile-to-json.pl', TN_TABLE.get(annot_format, 'gff'), 60 self.genome_path = genome
25 annot_file, '--trackLabel', key, '--key', annot_label] 61 self.brewer_colour_idx = 0
26 subprocess.check_output(cmd, cwd=jbrowse_dir) 62
63 self.clone_jbrowse(self.jbrowse, self.outdir)
64 self.process_genome()
65
66 def subprocess_check_call(self, command):
67 log.debug('cd %s && %s', self.jbrowse_dir, ' '.join(command))
68 subprocess.check_call(command, cwd=self.jbrowse_dir)
69
70 def _get_colours(self):
71 r, g, b = BREWER_COLOUR_SCHEMES[self.brewer_colour_idx]
72 self.brewer_colour_idx += 1
73 return r, g, b
74
75 def process_genome(self):
76 self.subprocess_check_call(['perl', 'bin/prepare-refseqs.pl',
77 '--fasta', self.genome_path])
78
79 def _add_json(self, json_data):
80 if len(json_data.keys()) == 0:
81 return
82
83 tmp = tempfile.NamedTemporaryFile(delete=False)
84 tmp.write(json.dumps(json_data))
85 tmp.close()
86 cmd = ['perl', 'bin/add-track-json.pl', tmp.name,
87 os.path.join('data', 'trackList.json')]
88 self.subprocess_check_call(cmd)
89 os.unlink(tmp.name)
90
91 def add_blastxml(self, data, key, **kwargs):
92 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
93 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
94 '--trim_end', '--min_gap', str(kwargs['min_gap']), data]
95 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_unrebased)
96 gff3_unrebased.close()
97
98 gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
99 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
100 if kwargs['protein']:
101 cmd.append('--protein2dna')
102 cmd.extend([kwargs['parent'], gff3_unrebased.name])
103 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_rebased)
104 gff3_rebased.close()
105
106 label = hashlib.md5(data).hexdigest()
107
108 red, green, blue = self._get_colours()
109 color_function = COLOR_FUNCTION_TEMPLATE.substitute({
110 'score': "feature._parent.get('score')",
111 'opacity': BLAST_OPACITY_MATH,
112 'red': red,
113 'green': green,
114 'blue': blue,
115 })
116
117 clientConfig = {
118 'label': 'description',
119 'color': color_function.replace('\n', ''),
120 'description': 'Hit_titles',
121 }
122 config = {'glyph': 'JBrowse/View/FeatureGlyph/Segments'}
123 if 'category' in kwargs:
124 config['category'] = kwargs['category']
125
126 cmd = ['perl', 'bin/flatfile-to-json.pl',
127 '--gff', gff3_rebased.name,
128 '--trackLabel', label,
129 '--key', key,
130 '--clientConfig', json.dumps(clientConfig),
131 '--config', json.dumps(config),
132 '--trackType', 'JBrowse/View/Track/CanvasFeatures'
133 ]
134
135 self.subprocess_check_call(cmd)
136 os.unlink(gff3_rebased.name)
137 os.unlink(gff3_unrebased.name)
138
139 def _min_max_gff(self, gff_file):
140 min_val = None
141 max_val = None
142 with open(gff_file, 'r') as handle:
143 for line in handle:
144 try:
145 value = float(line.split('\t')[5])
146 min_val = min(value, (min_val or value))
147 max_val = max(value, (max_val or value))
148
149 if value < min_val:
150 min_val = value
151
152 if value > max_val:
153 max_val = value
154 except Exception:
155 pass
156 return min_val, max_val
157
158 def add_bigwig(self, data, key, **kwargs):
159 label = hashlib.md5(data).hexdigest()
160 dest = os.path.join('data', 'raw', os.path.basename(data))
161 cmd = ['ln', '-s', data, dest]
162 self.subprocess_check_call(cmd)
163
164 track_data = {
165 "label": label,
166 "urlTemplate": os.path.join('..', dest),
167 "bicolor_pivot": "zero",
168 "storeClass": "JBrowse/Store/SeqFeature/BigWig",
169 "type": "JBrowse/View/Track/Wiggle/Density",
170 "key": key,
171 }
172 track_data.update(kwargs)
173
174 if 'min' not in track_data and 'max' not in track_data \
175 and 'autoscale' not in track_data:
176 track_data['autoscale'] = 'local'
177
178 self._add_json(track_data)
179
180 def add_bam(self, data, key, **kwargs):
181 label = hashlib.md5(data).hexdigest()
182 dest = os.path.join('data', 'raw', os.path.basename(data))
183 # ln?
184 cmd = ['ln', '-s', data, dest]
185 self.subprocess_check_call(cmd)
186
187 bai_source = kwargs['bam_index']
188 cmd = ['ln', '-s', bai_source, dest + '.bai']
189 self.subprocess_check_call(cmd)
190
191 track_data = {
192 "urlTemplate": os.path.join('..', dest),
193 "key": key,
194 "label": label,
195 "type": "JBrowse/View/Track/Alignments2",
196 "storeClass": "JBrowse/Store/SeqFeature/BAM",
197 }
198 if 'category' in kwargs:
199 track_data['category'] = kwargs['category']
200 self._add_json(track_data)
201
202 if kwargs.get('auto_snp', False):
203 track_data = {
204 "storeClass": "JBrowse/Store/SeqFeature/BAM",
205 "urlTemplate": os.path.join('..', dest),
206 "type": "JBrowse/View/Track/SNPCoverage",
207 "key": key + " - SNPs/Coverage",
208 "label": label + "_autosnp",
209 }
210 if 'category' in kwargs:
211 track_data['category'] = kwargs['category']
212 self._add_json(track_data)
213
214 def add_vcf(self, data, key, **kwargs):
215 label = hashlib.md5(data).hexdigest()
216 dest = os.path.join('data', 'raw', os.path.basename(data))
217 # ln?
218 cmd = ['ln', '-s', data, dest]
219 self.subprocess_check_call(cmd)
220 cmd = ['bgzip', dest]
221 self.subprocess_check_call(cmd)
222 cmd = ['tabix', '-p', 'vcf', dest + '.gz']
223 self.subprocess_check_call(cmd)
224
225 track_data = {
226 "key": key,
227 "label": label,
228 "urlTemplate": os.path.join('..', dest + '.gz'),
229 "type": "JBrowse/View/Track/HTMLVariants",
230 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix",
231 }
232 track_data.update(kwargs)
233 self._add_json(track_data)
234
235 def add_features(self, data, key, format, **kwargs):
236 label = hashlib.md5(data).hexdigest()
237 cmd = ['perl', 'bin/flatfile-to-json.pl',
238 TN_TABLE.get(format, 'gff'), data,
239 '--trackLabel', label,
240 '--key', key]
241
242 config = {}
243 if 'category' in kwargs:
244 config['category'] = kwargs['category']
245
246 if kwargs.get('match', False):
247 clientConfig = {
248 'label': 'description',
249 'description': 'Hit_titles',
250 }
251
252 # Get min/max and build a scoring function since JBrowse doesn't
253 min_val, max_val = self._min_max_gff(data)
254
255 if min_val is not None and max_val is not None:
256 MIN_MAX_OPACITY_MATH = Template("""
257 var opacity = (score - ${min}) * (1/(${max} - ${min}));
258 """).substitute({
259 'max': max_val,
260 'min': min_val,
261 })
262
263 red, green, blue = self._get_colours()
264 color_function = COLOR_FUNCTION_TEMPLATE.substitute({
265 'score': "feature.get('score')",
266 'opacity': MIN_MAX_OPACITY_MATH,
267 'red': red,
268 'green': green,
269 'blue': blue,
270 })
271
272 clientConfig['color'] = color_function.replace('\n', '')
273
274 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments'
275
276 cmd += ['--clientConfig', json.dumps(clientConfig),
277 '--trackType', 'JBrowse/View/Track/CanvasFeatures'
278 ]
279
280 cmd.extend(['--config', json.dumps(config)])
281
282 self.subprocess_check_call(cmd)
283
284 def process_annotations(self, data, key, format, **kwargs):
285 if format in ('gff', 'gff3', 'bed'):
286 self.add_features(data, key, format, **kwargs)
287 elif format == 'bigwig':
288 self.add_bigwig(data, key, **kwargs)
289 elif format == 'bam':
290 self.add_bam(data, key, **kwargs)
291 elif format == 'blastxml':
292 self.add_blastxml(data, key, **kwargs)
293 elif format == 'vcf':
294 self.add_vcf(data, key, **kwargs)
295
296 def clone_jbrowse(self, jbrowse_dir, destination):
297 # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks
298 # unlike copytree
299 cmd = ['mkdir', '-p', destination]
300 subprocess.check_call(cmd)
301 cmd = ['cp', '-r', jbrowse_dir, destination]
302 subprocess.check_call(cmd)
303 cmd = ['mkdir', '-p', os.path.join(destination, 'JBrowse-1.11.6',
304 'data', 'raw')]
305 subprocess.check_call(cmd)
306
307 # http://unix.stackexchange.com/a/38691/22785
308 # JBrowse releases come with some broken symlinks
309 cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+']
310 subprocess.check_call(cmd)
27 311
28 312
29 if __name__ == '__main__': 313 if __name__ == '__main__':
30 parser = argparse.ArgumentParser(description="", epilog="") 314 parser = argparse.ArgumentParser(description="", epilog="")
31 parser.add_argument('genome', type=file, help='Input genome file') 315 parser.add_argument('genome', type=file, help='Input genome file')
32 316 parser.add_argument('yaml', type=file, help='Track Configuration')
33 parser.add_argument('--gff3', type=file, nargs='*', help='GFF3/BED/GBK File')
34 parser.add_argument('--gff3_format', choices=['gff3', 'gff', 'bed', 'gbk'], nargs='*', help='GFF3/BED/GBK Format')
35 parser.add_argument('--gff3_label', type=str, nargs='*', help='GFF3/BED/GBK Label')
36 317
37 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') 318 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
38 parser.add_argument('--outdir', help='Output directory', default='out') 319 parser.add_argument('--outdir', help='Output directory', default='out')
39 args = parser.parse_args() 320 args = parser.parse_args()
40 321
41 jbrowse_dir = os.path.join(args.outdir, 'JBrowse-1.11.6') 322 jc = JbrowseConnector(
42 shutil.copytree(args.jbrowse, jbrowse_dir) 323 jbrowse=args.jbrowse,
43 324 jbrowse_dir=os.path.join(args.outdir, 'JBrowse-1.11.6'),
44 process_genome(jbrowse_dir, os.path.realpath(args.genome.name)) 325 outdir=args.outdir,
45 326 genome=os.path.realpath(args.genome.name),
46 for gff3, gff3_format, gff3_label in zip(args.gff3, args.gff3_format, args.gff3_label): 327 )
47 gff3_path = os.path.realpath(gff3.name) 328
48 process_annotations(jbrowse_dir, gff3_path, gff3_label, gff3_format) 329 track_data = yaml.load(args.yaml)
330 for track in track_data:
331 path = os.path.realpath(track['file'])
332 extra = track.get('options', {})
333 if '__unused__' in extra:
334 del extra['__unused__']
335
336 for possible_partial_path in ('bam_index', 'parent'):
337 if possible_partial_path in extra:
338 extra[possible_partial_path] = os.path.realpath(
339 extra[possible_partial_path])
340 extra['category'] = track.get('category', 'Default')
341
342 jc.process_annotations(path, track['label'], track['ext'], **extra)
49 343
50 print """ 344 print """
51 <html> 345 <html>
52 <body> 346 <body>
53 <script type="text/javascript"> 347 <script type="text/javascript">
54 window.location=JBrowse-1.11.6/index.html 348 window.location=JBrowse-1.11.6/index.html
55 </script> 349 </script>
56 <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a> 350 <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a>
351 <p>Please note that JBrowse functions best on production Galaxy
352 instances. The paste server used in development instances has issues
353 serving the volumes of data regularly involved in JBrowse</p>
57 </body> 354 </body>
58 </html> 355 </html>
59 """ 356 """