comparison jbrowse2.py @ 136:93fdd696c281 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 4fa86613193c985e0cb9a8fc795c56b8bc7b8532
author iuc
date Thu, 02 Oct 2025 10:20:29 +0000
parents 21bb464c1d53
children
comparison
equal deleted inserted replaced
135:21bb464c1d53 136:93fdd696c281
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2
3 import argparse 2 import argparse
4 import binascii 3 import csv
5 import copy
6 import datetime 4 import datetime
5 import hashlib
7 import json 6 import json
8 import logging 7 import logging
9 import os 8 import os
10 import re 9 import re
11 import shutil 10 import shutil
12 import ssl
13 import string
14 import struct
15 import subprocess 11 import subprocess
16 import urllib.request
17 import xml.etree.ElementTree as ET 12 import xml.etree.ElementTree as ET
18 from collections import defaultdict 13 from collections import defaultdict
19 14
15 import requests
16
17
20 logging.basicConfig(level=logging.DEBUG) 18 logging.basicConfig(level=logging.DEBUG)
21 log = logging.getLogger("jbrowse") 19 log = logging.getLogger("jbrowse")
22
23 JB2VER = "v2.15.4"
24 # version pinned if cloning - but not cloning now
25 logCommands = True
26 # useful for seeing what's being written but not for production setups
27 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") 20 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
28 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) 21 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__))
29 GALAXY_INFRASTRUCTURE_URL = None 22 GALAXY_INFRASTRUCTURE_URL = None
30 mapped_chars = { 23 mapped_chars = {
31 ">": "__gt__", 24 ">": "__gt__",
38 "}": "__cc__", 31 "}": "__cc__",
39 "@": "__at__", 32 "@": "__at__",
40 "#": "__pd__", 33 "#": "__pd__",
41 "": "__cn__", 34 "": "__cn__",
42 } 35 }
43
44
45 INDEX_TEMPLATE = """<!doctype html>
46 <html lang="en" style="height:100%">
47 <head>
48 <meta charset="utf-8"/>
49 <link rel="shortcut icon" href="./favicon.ico"/>
50 <meta name="viewport" content="width=device-width,initial-scale=1"/>
51 <meta name="theme-color" content="#000000"/>
52 <meta name="description" content="A fast and flexible genome browser"/>
53 <link rel="manifest" href="./manifest.json"/>
54 <title>JBrowse</title>
55 </script>
56 </head>
57 <body style="overscroll-behavior:none; height:100%; margin: 0;">
58 <iframe
59 id="jbframe"
60 title="JBrowse2"
61 frameborder="0"
62 width="100%"
63 height="100%"
64 src='index_noview.html?config=config.json__SESSION_SPEC__'>
65 </iframe>
66 </body>
67 </html>
68 """
69
70
71 class ColorScaling(object):
72
73 COLOR_FUNCTION_TEMPLATE = """
74 function(feature, variableName, glyphObject, track) {{
75 var score = {score};
76 {opacity}
77 return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
78 }}
79 """
80
81 COLOR_FUNCTION_TEMPLATE_QUAL = r"""
82 function(feature, variableName, glyphObject, track) {{
83 var search_up = function self(sf, attr){{
84 if(sf.get(attr) !== undefined){{
85 return sf.get(attr);
86 }}
87 if(sf.parent() === undefined) {{
88 return;
89 }}else{{
90 return self(sf.parent(), attr);
91 }}
92 }};
93
94 var search_down = function self(sf, attr){{
95 if(sf.get(attr) !== undefined){{
96 return sf.get(attr);
97 }}
98 if(sf.children() === undefined) {{
99 return;
100 }}else{{
101 var kids = sf.children();
102 for(var child_idx in kids){{
103 var x = self(kids[child_idx], attr);
104 if(x !== undefined){{
105 return x;
106 }}
107 }}
108 return;
109 }}
110 }};
111
112 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
113 var score = (search_up(feature, 'score') || search_down(feature, 'score'));
114 {opacity}
115 if(score === undefined){{ opacity = 1; }}
116 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
117 var red = parseInt(result[1], 16);
118 var green = parseInt(result[2], 16);
119 var blue = parseInt(result[3], 16);
120 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
121 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
122 }}
123 """
124
125 OPACITY_MATH = {
126 "linear": """
127 var opacity = (score - ({min})) / (({max}) - ({min}));
128 """,
129 "logarithmic": """
130 var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min}));
131 """,
132 "blast": """
133 var opacity = 0;
134 if(score == 0.0) {{
135 opacity = 1;
136 }} else {{
137 opacity = (20 - Math.log10(score)) / 180;
138 }}
139 """,
140 }
141
142 BREWER_COLOUR_IDX = 0
143 BREWER_COLOUR_SCHEMES = [
144 (166, 206, 227),
145 (31, 120, 180),
146 (178, 223, 138),
147 (51, 160, 44),
148 (251, 154, 153),
149 (227, 26, 28),
150 (253, 191, 111),
151 (255, 127, 0),
152 (202, 178, 214),
153 (106, 61, 154),
154 (255, 255, 153),
155 (177, 89, 40),
156 (228, 26, 28),
157 (55, 126, 184),
158 (77, 175, 74),
159 (152, 78, 163),
160 (255, 127, 0),
161 ]
162
163 BREWER_DIVERGING_PALLETES = {
164 "BrBg": ("#543005", "#003c30"),
165 "PiYg": ("#8e0152", "#276419"),
166 "PRGn": ("#40004b", "#00441b"),
167 "PuOr": ("#7f3b08", "#2d004b"),
168 "RdBu": ("#67001f", "#053061"),
169 "RdGy": ("#67001f", "#1a1a1a"),
170 "RdYlBu": ("#a50026", "#313695"),
171 "RdYlGn": ("#a50026", "#006837"),
172 "Spectral": ("#9e0142", "#5e4fa2"),
173 }
174
175 def __init__(self):
176 self.brewer_colour_idx = 0
177
178 def rgb_from_hex(self, hexstr):
179 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
180 return struct.unpack("BBB", binascii.unhexlify(hexstr))
181
182 def min_max_gff(self, gff_file):
183 min_val = None
184 max_val = None
185 with open(gff_file, "r") as handle:
186 for line in handle:
187 try:
188 value = float(line.split("\t")[5])
189 min_val = min(value, (min_val or value))
190 max_val = max(value, (max_val or value))
191
192 if value < min_val:
193 min_val = value
194
195 if value > max_val:
196 max_val = value
197 except Exception:
198 pass
199 return min_val, max_val
200
201 def hex_from_rgb(self, r, g, b):
202 return "#%02x%02x%02x" % (r, g, b)
203
204 def _get_colours(self):
205 r, g, b = self.BREWER_COLOUR_SCHEMES[
206 self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)
207 ]
208 self.brewer_colour_idx += 1
209 return r, g, b
210
211 def parse_menus(self, track):
212 trackConfig = {"menuTemplate": [{}, {}, {}, {}]}
213
214 if "menu" in track["menus"]:
215 menu_list = [track["menus"]["menu"]]
216 if isinstance(track["menus"]["menu"], list):
217 menu_list = track["menus"]["menu"]
218
219 for m in menu_list:
220 tpl = {
221 "action": m["action"],
222 "label": m.get("label", "{name}"),
223 "iconClass": m.get("iconClass", "dijitIconBookmark"),
224 }
225 if "url" in m:
226 tpl["url"] = m["url"]
227 if "content" in m:
228 tpl["content"] = m["content"]
229 if "title" in m:
230 tpl["title"] = m["title"]
231
232 trackConfig["menuTemplate"].append(tpl)
233
234 return trackConfig
235
236 def parse_colours(self, track, trackFormat, gff3=None):
237 # Wiggle tracks have a bicolor pallete
238 trackConfig = {"style": {}}
239 if trackFormat == "wiggle":
240
241 trackConfig["style"]["pos_color"] = track["wiggle"]["color_pos"]
242 trackConfig["style"]["neg_color"] = track["wiggle"]["color_neg"]
243
244 if trackConfig["style"]["pos_color"] == "__auto__":
245 trackConfig["style"]["neg_color"] = self.hex_from_rgb(
246 *self._get_colours()
247 )
248 trackConfig["style"]["pos_color"] = self.hex_from_rgb(
249 *self._get_colours()
250 )
251
252 # Wiggle tracks can change colour at a specified place
253 bc_pivot = track["wiggle"]["bicolor_pivot"]
254 if bc_pivot not in ("mean", "zero"):
255 # The values are either one of those two strings
256 # or a number
257 bc_pivot = float(bc_pivot)
258 trackConfig["bicolor_pivot"] = bc_pivot
259 elif "scaling" in track:
260 if track["scaling"]["method"] == "ignore":
261 if track["scaling"]["scheme"]["color"] != "__auto__":
262 trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"]
263 else:
264 trackConfig["style"]["color"] = self.hex_from_rgb(
265 *self._get_colours()
266 )
267 else:
268 # Scored method
269 algo = track["scaling"]["algo"]
270 # linear, logarithmic, blast
271 scales = track["scaling"]["scales"]
272 # type __auto__, manual (min, max)
273 scheme = track["scaling"]["scheme"]
274 # scheme -> (type (opacity), color)
275 # ==================================
276 # GENE CALLS OR BLAST
277 # ==================================
278 if trackFormat == "blast":
279 red, green, blue = self._get_colours()
280 color_function = self.COLOR_FUNCTION_TEMPLATE.format(
281 **{
282 "score": "feature._parent.get('score')",
283 "opacity": self.OPACITY_MATH["blast"],
284 "red": red,
285 "green": green,
286 "blue": blue,
287 }
288 )
289 trackConfig["style"]["color"] = color_function.replace("\n", "")
290 elif trackFormat == "gene_calls":
291 # Default values, based on GFF3 spec
292 min_val = 0
293 max_val = 1000
294 # Get min/max and build a scoring function since JBrowse doesn't
295 if scales["type"] == "automatic" or scales["type"] == "__auto__":
296 min_val, max_val = self.min_max_gff(gff3)
297 else:
298 min_val = scales.get("min", 0)
299 max_val = scales.get("max", 1000)
300
301 if scheme["color"] == "__auto__":
302 user_color = "undefined"
303 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
304 elif scheme["color"].startswith("#"):
305 user_color = "'%s'" % self.hex_from_rgb(
306 *self.rgb_from_hex(scheme["color"][1:])
307 )
308 auto_color = "undefined"
309 else:
310 user_color = "undefined"
311 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
312
313 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(
314 **{
315 "opacity": self.OPACITY_MATH[algo].format(
316 **{"max": max_val, "min": min_val}
317 ),
318 "user_spec_color": user_color,
319 "auto_gen_color": auto_color,
320 }
321 )
322
323 trackConfig["style"]["color"] = color_function.replace("\n", "")
324 return trackConfig
325 36
326 37
327 def etree_to_dict(t): 38 def etree_to_dict(t):
328 if t is None: 39 if t is None:
329 return {} 40 return {}
351 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) 62 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
352 63
353 64
354 def metadata_from_node(node): 65 def metadata_from_node(node):
355 metadata = {} 66 metadata = {}
356 try: 67
357 if len(node.findall("dataset")) != 1: 68 if len(node.findall("dataset")) == 1:
358 # exit early 69
359 return metadata 70 for key, value in node.findall("dataset")[0].attrib.items():
360 except Exception: 71 metadata[f"dataset_{key}"] = value
361 return {} 72
362
363 for key, value in node.findall("dataset")[0].attrib.items():
364 metadata["dataset_%s" % key] = value
365
366 if node.findall("history"):
367 for key, value in node.findall("history")[0].attrib.items(): 73 for key, value in node.findall("history")[0].attrib.items():
368 metadata["history_%s" % key] = value 74 metadata[f"history_{key}"] = value
369 75
370 if node.findall("metadata"):
371 for key, value in node.findall("metadata")[0].attrib.items(): 76 for key, value in node.findall("metadata")[0].attrib.items():
372 metadata["metadata_%s" % key] = value 77 metadata[f"metadata_{key}"] = value
78
79 for key, value in node.findall("tool")[0].attrib.items():
80 metadata[f"tool_{key}"] = value
81
373 # Additional Mappings applied: 82 # Additional Mappings applied:
374 metadata["dataset_edam_format"] = ( 83 metadata[
375 '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( 84 "dataset_edam_format"
376 metadata["dataset_edam_format"], metadata["dataset_file_ext"] 85 ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
377 ) 86 metadata["dataset_edam_format"], metadata["dataset_file_ext"]
378 ) 87 )
379 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( 88 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format(
380 metadata["history_user_email"] 89 metadata["history_user_email"]
381 ) 90 )
382 metadata["hist_name"] = metadata["history_display_name"] 91 metadata[
383 metadata["history_display_name"] = ( 92 "history_display_name"
384 '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( 93 ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
385 galaxy=GALAXY_INFRASTRUCTURE_URL, 94 galaxy=GALAXY_INFRASTRUCTURE_URL,
386 encoded_hist_id=metadata.get("history_id", "not available"), 95 encoded_hist_id=metadata["history_id"],
387 hist_name=metadata.get("history_display_name", "not available"), 96 hist_name=metadata["history_display_name"],
388 )
389 ) 97 )
390 if node.findall("tool"): 98 metadata[
391 for key, value in node.findall("tool")[0].attrib.items(): 99 "tool_tool"
392 metadata["tool_%s" % key] = value 100 ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format(
393 metadata["tool_tool"] = ( 101 galaxy=GALAXY_INFRASTRUCTURE_URL,
394 '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}{tool_version}</a>'.format( 102 encoded_id=metadata["dataset_id"],
395 galaxy=GALAXY_INFRASTRUCTURE_URL, 103 tool_id=metadata["tool_tool_id"],
396 encoded_id=metadata.get("dataset_id", ""), 104 # tool_version=metadata['tool_tool_version'],
397 tool_id=metadata.get("tool_tool_id", ""),
398 tool_version=metadata.get("tool_tool_version", ""),
399 )
400 ) 105 )
106
107 # Load additional metadata from a TSV file if any given by user
108 bonus = node.findall("bonus")
109 if bonus and "src" in bonus[0].attrib and bonus[0].attrib["src"]:
110 with open(bonus[0].attrib["src"], "r") as bonus_tsv:
111 bonus_content = csv.reader(bonus_tsv, delimiter="\t", quotechar='"')
112 for row in bonus_content:
113 if len(row) == 2:
114 if row[0] in metadata:
115 log.warning(f"Overwriting existing metadata {row[0]} with value from bonus file {row[1]}")
116 metadata[row[0]] = row[1]
117 else:
118 log.warning(f"Skipping invalid bonus metadata line: {row}")
119
401 return metadata 120 return metadata
402 121
403 122
404 class JbrowseConnector(object): 123 class JbrowseConnector(object):
405 def __init__(self, outdir, jbrowse2path): 124 def __init__(self, jbrowse, outdir, update):
406 self.bpPerPx = 50 125 self.jbrowse = jbrowse
407 self.trackCounter = 0 # to avoid name clashes 126 self.outdir = outdir
408 self.assemblies = [] # these require more than a few line diff. 127 self.update = update
409 self.assmeta = {} 128
410 self.ass_first_contigs = ( 129 self.tracksToIndex = {}
411 [] 130
412 ) # for default session - these are read as first line of the assembly .fai 131 # This is the id of the current assembly
413 self.giURL = GALAXY_INFRASTRUCTURE_URL 132 self.assembly_ids = {}
414 self.outdir = os.path.abspath(outdir) 133
415 self.jbrowse2path = jbrowse2path 134 self.default_views = {}
416 os.makedirs(self.outdir, exist_ok=True) 135
417 self.genome_names = [] 136 self.plugins = []
418 self.trackIdlist = [] 137
419 self.tracksToAdd = {} 138 self.use_synteny_viewer = False
420 self.config_json = {} 139
421 self.config_json_file = os.path.join(outdir, "config.json") 140 self.synteny_tracks = []
422 self.clone_jbrowse(realclone=False) 141
142 self.clone_jbrowse(self.jbrowse, self.outdir)
143
144 # If upgrading, look at the existing data
145 self.check_existing(self.outdir)
423 146
424 def get_cwd(self, cwd): 147 def get_cwd(self, cwd):
425 if cwd: 148 if cwd:
426 return self.outdir 149 return self.outdir
427 else: 150 else:
428 return subprocess.check_output(["pwd"]).decode("utf-8").strip() 151 return subprocess.check_output(['pwd']).decode('utf-8').strip()
152 # return None
429 153
430 def subprocess_check_call(self, command, output=None, cwd=True): 154 def subprocess_check_call(self, command, output=None, cwd=True):
431 if output: 155 if output:
432 if logCommands: 156 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)} > {output.name}")
433 log.debug(
434 "cd %s && %s > %s", self.get_cwd(cwd), " ".join(command), output
435 )
436 subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output) 157 subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output)
437 else: 158 else:
438 if logCommands: 159 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)}")
439 log.debug("cd %s && %s", self.get_cwd(cwd), " ".join(command))
440 subprocess.check_call(command, cwd=self.get_cwd(cwd)) 160 subprocess.check_call(command, cwd=self.get_cwd(cwd))
441 161
442 def subprocess_popen(self, command, cwd=True): 162 def subprocess_popen(self, command, cwd=True):
443 if logCommands: 163 log.debug(f"cd {self.get_cwd(cwd)} && {command}")
444 log.debug(command)
445 p = subprocess.Popen( 164 p = subprocess.Popen(
446 command, 165 command,
447 cwd=self.outdir, 166 cwd=self.get_cwd(cwd),
448 shell=True, 167 shell=True,
449 stdin=subprocess.PIPE, 168 stdin=subprocess.PIPE,
450 stdout=subprocess.PIPE, 169 stdout=subprocess.PIPE,
451 stderr=subprocess.PIPE, 170 stderr=subprocess.PIPE,
452 ) 171 )
453 output, err = p.communicate() 172 output, err = p.communicate()
454 retcode = p.returncode 173 retcode = p.returncode
455 if retcode != 0: 174 if retcode != 0:
456 log.error(command) 175 log.error(f"cd {self.get_cwd(cwd)} && {command}")
457 log.error(output) 176 log.error(output)
458 log.error(err) 177 log.error(err)
459 raise RuntimeError("Command failed with exit code %s" % (retcode)) 178 raise RuntimeError(f"Command failed with exit code {retcode}")
460 179
461 def subprocess_check_output(self, command): 180 def subprocess_check_output(self, command, cwd=True):
462 if logCommands: 181 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)}")
463 log.debug(" ".join(command)) 182 return subprocess.check_output(command, cwd=self.get_cwd(cwd))
464 return subprocess.check_output(command, cwd=self.outdir)
465 183
466 def symlink_or_copy(self, src, dest): 184 def symlink_or_copy(self, src, dest):
467 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( 185 # Use to support symlinking in jbrowse1, in jbrowse2 prefer to use remote uri
468 os.environ["GALAXY_JBROWSE_SYMLINKS"] 186 cmd = ["cp", src, dest]
469 ): 187
470 cmd = ["ln", "-s", src, dest] 188 return self.subprocess_check_call(cmd)
189
190 def _prepare_track_style(self, xml_conf):
191 style_data = {
192 "type": "LinearBasicDisplay", # No ideal default, but should be overwritten anyway
193 }
194
195 if "display" in xml_conf["style"]:
196 style_data["type"] = xml_conf["style"]["display"]
197
198 style_data["displayId"] = f"{xml_conf['label']}_{style_data['type']}"
199
200 style_data.update(self._prepare_renderer_config(style_data["type"], xml_conf["style"]))
201
202 return {"displays": [style_data]}
203
204 def _prepare_renderer_config(self, display_type, xml_conf):
205
206 style_data = {}
207
208 # if display_type in ("LinearBasicDisplay", "LinearVariantDisplay"):
209 # TODO LinearVariantDisplay does not understand these options when written in config.json
210 if display_type in ("LinearBasicDisplay"):
211
212 # Doc: https://jbrowse.org/jb2/docs/config/svgfeaturerenderer/
213 style_data["renderer"] = {
214 "type": "SvgFeatureRenderer",
215 "showLabels": xml_conf.get("show_labels", True),
216 "showDescriptions": xml_conf.get("show_descriptions", True),
217 "labels": {
218 "name": xml_conf.get("labels_name", "jexl:get(feature,'name') || get(feature,'id')"),
219 "description": xml_conf.get("descriptions_name", "jexl:get(feature,'note') || get(feature,'description')")
220 },
221 "displayMode": xml_conf.get("display_mode", "normal"),
222 "maxHeight": xml_conf.get("max_height", 1200),
223 }
224
225 elif display_type == "LinearArcDisplay":
226
227 # Doc: https://jbrowse.org/jb2/docs/config/arcrenderer/
228 style_data["renderer"] = {
229 "type": "ArcRenderer",
230 "label": xml_conf.get("labels_name", "jexl:get(feature,'score')"),
231 "displayMode": xml_conf.get("display_mode", "arcs"),
232 }
233
234 elif display_type == "LinearWiggleDisplay":
235
236 wig_renderer = xml_conf.get("renderer", "xyplot")
237 style_data["defaultRendering"] = wig_renderer
238
239 elif display_type == "MultiLinearWiggleDisplay":
240
241 wig_renderer = xml_conf.get("renderer", "multirowxy")
242 style_data["defaultRendering"] = wig_renderer
243
244 elif display_type == "LinearSNPCoverageDisplay":
245
246 # Does not work
247 # style_data["renderer"] = {
248 # "type": "SNPCoverageRenderer",
249 # "displayCrossHatches": xml_conf.get("display_cross_hatches", True),
250 # }
251
252 style_data["scaleType"] = xml_conf.get("scale_type", "linear")
253 if "min_score" in xml_conf:
254 style_data["minScore"] = xml_conf["min_score"]
255
256 if "max_score" in xml_conf:
257 style_data["maxScore"] = xml_conf["max_score"]
258
259 # Doc: https://jbrowse.org/jb2/docs/config/snpcoveragerenderer/
260
261 return style_data
262
263 def _prepare_format_details(self, xml_conf):
264 formatDetails = {
265 }
266
267 if "feature" in xml_conf["formatdetails"]:
268 feat_jexl = xml_conf["formatdetails"]["feature"]
269 for key, value in mapped_chars.items():
270 feat_jexl = feat_jexl.replace(value, key)
271 formatDetails["feature"] = feat_jexl
272
273 if "subfeature" in xml_conf["formatdetails"]:
274 sfeat_jexl = xml_conf["formatdetails"]["subfeature"]
275 for key, value in mapped_chars.items():
276 sfeat_jexl = sfeat_jexl.replace(value, key)
277 formatDetails["subfeatures"] = sfeat_jexl
278
279 if "depth" in xml_conf["formatdetails"]:
280 formatDetails["depth"] = int(xml_conf["formatdetails"]["depth"])
281
282 return {"formatDetails": formatDetails}
283
284 def _prepare_track_metadata(self, xml_conf):
285 metadata = {
286 }
287
288 metadata = xml_conf["metadata"]
289
290 return {"metadata": metadata}
291
292 def check_existing(self, destination):
293 existing = os.path.join(destination, "config.json")
294 if os.path.exists(existing):
295 with open(existing, "r") as existing_conf:
296 conf = json.load(existing_conf)
297 if "assemblies" in conf:
298 for assembly in conf["assemblies"]:
299 if "name" in assembly:
300
301 # Look for a default scaffold
302 default_seq = None
303 if 'defaultSession' in conf and 'views' in conf['defaultSession']:
304 for view in conf['defaultSession']['views']:
305 if 'init' in view and 'assembly' in view['init'] and 'loc' in view['init']:
306 if view['init']['assembly'] == assembly["name"]:
307 default_seq = view['init']['loc'].split(":")[0]
308 if "views" in view:
309 subviews = view["views"]
310 for subview in subviews:
311 if 'init' in subview and 'assembly' in subview['init'] and 'loc' in subview['init']:
312 if subview['init']['assembly'] == assembly["name"]:
313 default_seq = subview['init']['loc'].split(":")[0]
314
315 self.assembly_ids[assembly["name"]] = default_seq
316
317 def _load_old_genome_views(self):
318
319 views = {}
320
321 config_path = os.path.join(self.outdir, "config.json")
322 with open(config_path, "r") as config_file:
323 config_json = json.load(config_file)
324
325 # Find default synteny views existing from a previous jbrowse dataset
326 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']:
327 for view in config_json['defaultSession']['views']:
328 if view['type'] != "LinearSyntenyView":
329 if 'init' in view and 'assembly' in view['init']:
330 views[view['init']['assembly']] = view
331
332 return views
333
334 def _load_old_synteny_views(self):
335
336 views = []
337
338 config_path = os.path.join(self.outdir, "config.json")
339 with open(config_path, "r") as config_file:
340 config_json = json.load(config_file)
341
342 # Find default synteny views existing from a previous jbrowse dataset
343 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']:
344 for view in config_json['defaultSession']['views']:
345 if view['type'] == "LinearSyntenyView":
346 views.append(view)
347
348 return views
349
350 def add_assembly(self, path, label, is_remote=False, cytobands=None, ref_name_aliases=None):
351
352 if not is_remote:
353 # Find a non-existing filename for the new genome
354 # (to avoid colision when upgrading an existing instance)
355 rel_seq_path = os.path.join("data", label)
356 seq_path = os.path.join(self.outdir, rel_seq_path)
357 fn_try = 1
358 while (
359 os.path.exists(seq_path + ".fasta")
360 or os.path.exists(seq_path + ".fasta.gz")
361 or os.path.exists(seq_path + ".fasta.gz.fai")
362 or os.path.exists(seq_path + ".fasta.gz.gzi")
363 ):
364 rel_seq_path = os.path.join("data", f"{label}{fn_try}")
365 seq_path = os.path.join(self.outdir, rel_seq_path)
366 fn_try += 1
367
368 # Check if the assembly already exists from a previous run (--update mode)
369 if self.update:
370
371 config_path = os.path.join(self.outdir, "config.json")
372 with open(config_path, "r") as config_file:
373 config_json = json.load(config_file)
374
375 for asby in config_json['assemblies']:
376 if asby['name'] == label:
377
378 # Find default views existing for this assembly
379 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']:
380 for view in config_json['defaultSession']['views']:
381 if 'init' in view and 'assembly' in view['init']:
382 if view['init']['assembly'] == label:
383
384 log.info("Found existing assembly from existing JBrowse2 instance, preserving it")
385
386 self.default_views[view['init']['assembly']] = view
387
388 return label
389
390 # Copy ref alias file if any
391 if ref_name_aliases:
392 copied_ref_name_aliases = seq_path + ".aliases"
393 shutil.copy(ref_name_aliases, copied_ref_name_aliases)
394 copied_ref_name_aliases = rel_seq_path + ".aliases"
395
396 # Copy cytobands file if any
397 if cytobands:
398 copied_cytobands = seq_path + ".cytobands"
399 shutil.copy(cytobands, copied_cytobands)
400 copied_cytobands = rel_seq_path + ".cytobands"
401
402 # Find a non-existing label for the new genome
403 # (to avoid colision when upgrading an existing instance)
404 lab_try = 1
405 uniq_label = label
406 while uniq_label in self.assembly_ids:
407 uniq_label = label + str(lab_try)
408 lab_try += 1
409
410 if is_remote:
411
412 # Find a default scaffold to display
413 with requests.get(path + ".fai", stream=True) as response:
414 response.raise_for_status()
415 first_seq = next(response.iter_lines())
416 first_seq = first_seq.decode("utf-8").split('\t')[0]
417
418 self.assembly_ids[uniq_label] = first_seq
419
420 # We assume we just need to suffix url with .fai and .gzi for indexes.
421 cmd_jb = [
422 "jbrowse",
423 "add-assembly",
424 "--name",
425 uniq_label,
426 "--type",
427 "bgzipFasta",
428 "--out",
429 self.outdir,
430 "--skipCheck",
431 ]
432
433 if ref_name_aliases:
434 cmd_jb.extend([
435 "--refNameAliases",
436 copied_ref_name_aliases,
437 ])
438
439 cmd_jb.append(path) # Path is an url in remote mode
440
441 self.subprocess_check_call(cmd_jb)
471 else: 442 else:
472 cmd = ["cp", src, dest] 443 # Find a default scaffold to display
473 444 with open(path, "r") as fa_handle:
474 return self.subprocess_check_call(cmd) 445 fa_header = fa_handle.readline()[1:].strip().split(" ")[0]
475 446
476 def _prepare_track_style(self, trackDict): 447 self.assembly_ids[uniq_label] = fa_header
477 448
478 style_data = { 449 copied_genome = seq_path + ".fasta"
479 "type": "LinearBasicDisplay", 450 shutil.copy(path, copied_genome)
480 "displayId": "%s-LinearBasicDisplay" % trackDict["trackId"], 451
452 # Compress with bgzip
453 cmd = ["bgzip", copied_genome]
454 self.subprocess_check_call(cmd)
455
456 # FAI Index
457 cmd = ["samtools", "faidx", copied_genome + ".gz"]
458 self.subprocess_check_call(cmd)
459
460 cmd_jb = [
461 "jbrowse",
462 "add-assembly",
463 "--load",
464 "inPlace",
465 "--name",
466 uniq_label,
467 "--type",
468 "bgzipFasta",
469 "--out",
470 self.outdir,
471 "--skipCheck",
472 ]
473
474 if ref_name_aliases:
475 cmd_jb.extend([
476 "--refNameAliases",
477 copied_ref_name_aliases,
478 ])
479
480 cmd_jb.append(rel_seq_path + ".fasta.gz")
481
482 self.subprocess_check_call(cmd_jb)
483
484 if cytobands:
485 self.add_cytobands(uniq_label, copied_cytobands)
486
487 return uniq_label
488
489 def add_cytobands(self, assembly_name, cytobands_path):
490
491 config_path = os.path.join(self.outdir, "config.json")
492 with open(config_path, "r") as config_file:
493 config_json = json.load(config_file)
494
495 config_data = {}
496
497 config_data["cytobands"] = {
498 "adapter": {
499 "type": "CytobandAdapter",
500 "cytobandLocation": {
501 "uri": cytobands_path
502 }
503 }
481 } 504 }
482 505
483 if trackDict.get("displays", None): # use first if multiple like bed 506 filled_assemblies = []
484 style_data["type"] = trackDict["displays"][0]["type"] 507 for assembly in config_json["assemblies"]:
485 style_data["displayId"] = trackDict["displays"][0]["displayId"] 508 if assembly["name"] == assembly_name:
486 return style_data 509 assembly.update(config_data)
487 510 filled_assemblies.append(assembly)
488 def getNrow(self, url): 511 config_json["assemblies"] = filled_assemblies
489 useuri = url.startswith("https://") or url.startswith("http://") 512
490 if not useuri: 513 with open(config_path, "w") as config_file:
491 fl = open(url, "r").readlines() 514 json.dump(config_json, config_file, indent=2)
492 nrow = len(fl) 515
516 def text_index(self):
517
518 for ass in self.tracksToIndex:
519 tracks = self.tracksToIndex[ass]
520 args = [
521 "jbrowse",
522 "text-index",
523 "--target",
524 self.outdir,
525 "--assemblies",
526 ass,
527 ]
528
529 tracks = ",".join(tracks)
530 if tracks:
531 args += ["--tracks", tracks]
532
533 log.info(f"-----> Running text-index on assembly {ass} and tracks {tracks}")
534
535 # Only run index if we want to index at least one
536 # If --tracks is not specified, it will index everything
537 self.subprocess_check_call(args)
538
539 def add_gc_content(self, parent, trackData, **kwargs):
540
541 adapter = {}
542 existing = os.path.join(self.outdir, "config.json")
543 if os.path.exists(existing):
544 with open(existing, "r") as existing_conf:
545 conf = json.load(existing_conf)
546 if "assemblies" in conf:
547 for assembly in conf["assemblies"]:
548 if assembly.get('name', "") == parent['uniq_id']:
549 adapter = assembly.get('sequence', {}).get('adapter', {})
550
551 json_track_data = {
552 "type": "GCContentTrack",
553 "trackId": trackData["label"],
554 "name": trackData["key"],
555 "adapter": adapter,
556 "category": [trackData["category"]],
557 "assemblyNames": [parent['uniq_id']],
558 }
559
560 style_json = self._prepare_track_style(trackData)
561
562 json_track_data.update(style_json)
563
564 self.subprocess_check_call(
565 [
566 "jbrowse",
567 "add-track-json",
568 "--target",
569 self.outdir,
570 json.dumps(json_track_data),
571 ]
572 )
573
574 def add_bigwig(self, parent, data, trackData, wiggleOpts, **kwargs):
575
576 if trackData['remote']:
577 rel_dest = data
493 else: 578 else:
494 try: 579 rel_dest = os.path.join("data", trackData["label"] + ".bw")
495 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) 580 dest = os.path.join(self.outdir, rel_dest)
496 scontext.check_hostname = False 581 self.symlink_or_copy(os.path.realpath(data), dest)
497 scontext.verify_mode = ssl.VerifyMode.CERT_NONE 582
498 with urllib.request.urlopen(url, context=scontext) as f: 583 style_json = self._prepare_track_style(trackData)
499 fl = f.readlines() 584
500 nrow = len(fl) 585 track_metadata = self._prepare_track_metadata(trackData)
501 except Exception: 586
502 nrow = 0 587 style_json.update(track_metadata)
503 logging.debug("getNrow %s returning %d" % (url, nrow)) 588
504 return nrow 589 self._add_track(
505 590 trackData["label"],
506 def process_genomes(self, genomes): 591 trackData["key"],
507 assembly = [] 592 trackData["category"],
508 assmeta = [] 593 rel_dest,
509 useuri = False 594 parent,
510 primaryGenome = None 595 config=style_json,
511 for i, genome_node in enumerate(genomes): 596 remote=trackData['remote']
512 this_genome = {} 597 )
513 if genome_node["useuri"] == "yes": 598
514 useuri = True 599 def add_bigwig_multi(self, parent, data_files, trackData, wiggleOpts, **kwargs):
515 genome_name = genome_node["label"].strip() 600
516 if len(genome_name) == 0: 601 subadapters = []
517 genome_name = os.path.splitext(os.path.basename(genome_node["path"]))[0] 602
518 if len(genome_name.split()) > 1: 603 sub_num = 0
519 genome_name = genome_name.split()[0] 604 for data in data_files:
520 # spaces and cruft break scripts when substituted 605 if trackData['remote']:
521 if not primaryGenome: 606 rel_dest = data[1]
522 primaryGenome = genome_name 607 else:
523 if genome_name not in self.genome_names: 608 rel_dest = os.path.join("data", f"{trackData['label']}_sub{sub_num}.bw")
524 self.genome_names.append(genome_name) 609 dest = os.path.join(self.outdir, rel_dest)
525 fapath = genome_node["path"] 610 self.symlink_or_copy(os.path.realpath(data[1]), dest)
526 if not useuri: 611
527 fapath = os.path.realpath(fapath) 612 subadapters.append({
528 assem, first_contig = self.make_assembly(fapath, genome_name, useuri) 613 "type": "BigWigAdapter",
529 assembly.append(assem) 614 "name": data[0],
530 self.ass_first_contigs.append(first_contig) 615 "bigWigLocation": {
531 if genome_name == primaryGenome: # first one 616 "uri": rel_dest,
532 this_genome["genome_name"] = genome_name # first one for all tracks 617 "locationType": "UriLocation"
533 this_genome["genome_sequence_adapter"] = assem["sequence"][ 618 }
534 "adapter" 619 })
535 ] 620 sub_num += 1
536 this_genome["genome_firstcontig"] = first_contig 621
537 assmeta.append(this_genome) 622 json_track_data = {
538 self.assemblies += assembly 623 "type": "MultiQuantitativeTrack",
539 self.assmeta[primaryGenome] = assmeta 624 "trackId": trackData["label"],
540 self.tracksToAdd[primaryGenome] = [] 625 "name": trackData["key"],
541 return primaryGenome 626 "adapter": {
542 627 "type": "MultiWiggleAdapter",
543 def make_assembly(self, fapath, gname, useuri): 628 "subadapters": subadapters
544 if useuri: 629 },
545 faname = fapath 630 "category": [trackData["category"]],
546 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) 631 "assemblyNames": [parent['uniq_id']],
547 scontext.check_hostname = False 632 }
548 scontext.verify_mode = ssl.VerifyMode.CERT_NONE 633
549 with urllib.request.urlopen(url=faname + ".fai", context=scontext) as f: 634 style_json = self._prepare_track_style(trackData)
550 fl = f.readline() 635
551 contig = fl.decode("utf8").strip() 636 json_track_data.update(style_json)
552 # Merlin 172788 8 60 61 637
638 track_metadata = self._prepare_track_metadata(trackData)
639
640 json_track_data.update(track_metadata)
641
642 self.subprocess_check_call(
643 [
644 "jbrowse",
645 "add-track-json",
646 "--target",
647 self.outdir,
648 json.dumps(json_track_data),
649 ]
650 )
651
652 # Anything ending in "am" (Bam or Cram)
653 def add_xam(self, parent, data, trackData, xamOpts, index=None, ext="bam", **kwargs):
654 index_ext = "bai"
655 if ext == "cram":
656 index_ext = "crai"
657
658 if trackData['remote']:
659 rel_dest = data
660 # Index will be set automatically as xam url + xai .suffix by add-track cmd
553 else: 661 else:
554 faname = gname + ".fa.gz" 662 rel_dest = os.path.join("data", trackData["label"] + f".{ext}")
555 fadest = os.path.realpath(os.path.join(self.outdir, faname)) 663 dest = os.path.join(self.outdir, rel_dest)
556 cmd = "bgzip -k -i -c -I '%s.gzi' '%s' > '%s'" % (fadest, fapath, fadest) 664 self.symlink_or_copy(os.path.realpath(data), dest)
557 subprocess.run(cmd, shell=True) 665
558 cmd = ["samtools", "faidx", fadest] 666 if index is not None and os.path.exists(os.path.realpath(index)):
667 # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
668 self.subprocess_check_call(
669 ["cp", os.path.realpath(index), dest + f".{index_ext}"]
670 )
671 else:
672 # Can happen in exotic condition
673 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
674 # => no index generated by galaxy, but there might be one next to the symlink target
675 # this trick allows to skip the bam sorting made by galaxy if already done outside
676 if os.path.exists(os.path.realpath(data) + f".{index_ext}"):
677 self.symlink_or_copy(
678 os.path.realpath(data) + f".{index_ext}", dest + f".{index_ext}"
679 )
680 else:
681 log.warn(
682 f"Could not find a bam index (.{index_ext} file) for {data}"
683 )
684
685 style_json = self._prepare_track_style(trackData)
686
687 track_metadata = self._prepare_track_metadata(trackData)
688
689 style_json.update(track_metadata)
690
691 self._add_track(
692 trackData["label"],
693 trackData["key"],
694 trackData["category"],
695 rel_dest,
696 parent,
697 config=style_json,
698 remote=trackData['remote']
699 )
700
701 def add_vcf(self, parent, data, trackData, vcfOpts={}, zipped=False, **kwargs):
702 if trackData['remote']:
703 rel_dest = data
704 else:
705 if zipped:
706 rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz")
707 dest = os.path.join(self.outdir, rel_dest)
708 shutil.copy(os.path.realpath(data), dest)
709 else:
710 rel_dest = os.path.join("data", trackData["label"] + ".vcf")
711 dest = os.path.join(self.outdir, rel_dest)
712 shutil.copy(os.path.realpath(data), dest)
713
714 cmd = ["bgzip", dest]
715 self.subprocess_check_call(cmd)
716 cmd = ["tabix", dest + ".gz"]
717 self.subprocess_check_call(cmd)
718
719 rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz")
720
721 style_json = self._prepare_track_style(trackData)
722
723 formatdetails = self._prepare_format_details(trackData)
724
725 style_json.update(formatdetails)
726
727 track_metadata = self._prepare_track_metadata(trackData)
728
729 style_json.update(track_metadata)
730
731 self._add_track(
732 trackData["label"],
733 trackData["key"],
734 trackData["category"],
735 rel_dest,
736 parent,
737 config=style_json,
738 remote=trackData['remote']
739 )
740
741 def add_gff(self, parent, data, format, trackData, gffOpts, **kwargs):
742 if trackData['remote']:
743 rel_dest = data
744 else:
745 rel_dest = os.path.join("data", trackData["label"] + ".gff")
746 dest = os.path.join(self.outdir, rel_dest)
747 rel_dest = rel_dest + ".gz"
748
749 self._sort_gff(data, dest)
750
751 style_json = self._prepare_track_style(trackData)
752
753 formatdetails = self._prepare_format_details(trackData)
754
755 style_json.update(formatdetails)
756
757 track_metadata = self._prepare_track_metadata(trackData)
758
759 style_json.update(track_metadata)
760
761 if gffOpts.get('index', 'false') in ("yes", "true", "True"):
762 if parent['uniq_id'] not in self.tracksToIndex:
763 self.tracksToIndex[parent['uniq_id']] = []
764 self.tracksToIndex[parent['uniq_id']].append(trackData["label"])
765
766 self._add_track(
767 trackData["label"],
768 trackData["key"],
769 trackData["category"],
770 rel_dest,
771 parent,
772 config=style_json,
773 remote=trackData['remote']
774 )
775
776 def add_bed(self, parent, data, format, trackData, gffOpts, **kwargs):
777 if trackData['remote']:
778 rel_dest = data
779 else:
780 rel_dest = os.path.join("data", trackData["label"] + ".bed")
781 dest = os.path.join(self.outdir, rel_dest)
782 rel_dest = rel_dest + ".gz"
783
784 self._sort_bed(data, dest)
785
786 style_json = self._prepare_track_style(trackData)
787
788 formatdetails = self._prepare_format_details(trackData)
789
790 style_json.update(formatdetails)
791
792 track_metadata = self._prepare_track_metadata(trackData)
793
794 style_json.update(track_metadata)
795
796 if gffOpts.get('index', 'false') in ("yes", "true", "True"):
797 if parent['uniq_id'] not in self.tracksToIndex:
798 self.tracksToIndex[parent['uniq_id']] = []
799 self.tracksToIndex[parent['uniq_id']].append(trackData["label"])
800
801 self._add_track(
802 trackData["label"],
803 trackData["key"],
804 trackData["category"],
805 rel_dest,
806 parent,
807 config=style_json,
808 remote=trackData['remote']
809 )
810
811 def add_paf(self, parent, data, trackData, pafOpts, **kwargs):
812
813 if trackData['remote']:
814 rel_dest = data
815
816 if rel_dest.endswith('pif') or rel_dest.endswith('pif.gz'):
817 adapter = "pif"
818 else:
819 adapter = "paf"
820 else:
821 rel_dest = os.path.join("data", trackData["label"] + ".pif.gz")
822 dest = os.path.join(self.outdir, rel_dest)
823
824 cmd = ["jbrowse", "make-pif", "--out", dest, os.path.realpath(data)]
559 self.subprocess_check_call(cmd) 825 self.subprocess_check_call(cmd)
560 contig = open(fadest + ".fai", "r").readline().strip() 826
561 adapter = { 827 adapter = "pif"
562 "type": "BgzipFastaAdapter", 828
563 "fastaLocation": { 829 if trackData["style"]["display"] == "LinearBasicDisplay":
564 "uri": faname, 830 # Normal style track
565 }, 831
566 "faiLocation": { 832 json_track_data = {
567 "uri": faname + ".fai", 833 "type": "SyntenyTrack",
568 }, 834 "trackId": trackData["label"],
569 "gziLocation": { 835 "name": trackData["key"],
570 "uri": faname + ".gzi", 836 "adapter": {
571 }, 837 "type": "PairwiseIndexedPAFAdapter",
838 "pifGzLocation": {
839 "uri": rel_dest,
840 },
841 "index": {
842 "location": {
843 "uri": rel_dest + ".tbi",
844 }
845 },
846 },
847 "category": [trackData["category"]],
848 "assemblyNames": [parent['uniq_id']],
849 }
850 else:
851 # Synteny viewer
852
853 json_track_data = {
854 "type": "SyntenyTrack",
855 "trackId": trackData["label"],
856 "name": trackData["key"],
857 "adapter": {
858 "assemblyNames": [
859 parent['uniq_id'],
860 "", # Placeholder until we know the next genome id
861 ],
862 },
863 "category": [trackData["category"]],
864 "assemblyNames": [
865 parent['uniq_id'],
866 "", # Placeholder until we know the next genome id
867 ]
868 }
869
870 if adapter == "pif":
871 json_track_data["adapter"].update({
872 "type": "PairwiseIndexedPAFAdapter",
873 "pifGzLocation": {
874 "uri": rel_dest,
875 },
876 "index": {
877 "location": {
878 "uri": rel_dest + ".tbi",
879 }
880 },
881 })
882 else:
883 json_track_data["adapter"].update({
884 "type": "PAFAdapter",
885 "pafLocation": {
886 "uri": rel_dest,
887 },
888 })
889
890 style_json = self._prepare_track_style(trackData)
891
892 json_track_data.update(style_json)
893
894 track_metadata = self._prepare_track_metadata(trackData)
895
896 json_track_data.update(track_metadata)
897
898 if trackData["style"]["display"] == "LinearBasicDisplay":
899 self.subprocess_check_call(
900 [
901 "jbrowse",
902 "add-track-json",
903 "--target",
904 self.outdir,
905 json.dumps(json_track_data),
906 ]
907 )
908 else:
909 self.synteny_tracks.append(json_track_data)
910
911 def add_hic(self, parent, data, trackData, hicOpts, **kwargs):
912 if trackData['remote']:
913 rel_dest = data
914 else:
915 rel_dest = os.path.join("data", trackData["label"] + ".hic")
916 dest = os.path.join(self.outdir, rel_dest)
917 self.symlink_or_copy(os.path.realpath(data), dest)
918
919 style_json = self._prepare_track_style(trackData)
920
921 track_metadata = self._prepare_track_metadata(trackData)
922
923 style_json.update(track_metadata)
924
925 self._add_track(
926 trackData["label"],
927 trackData["key"],
928 trackData["category"],
929 rel_dest,
930 parent,
931 config=style_json,
932 remote=trackData['remote']
933 )
934
935 def add_maf(self, parent, data, trackData, mafOpts, **kwargs):
936
937 # Add needed plugin
938 plugin_def = {
939 "name": "MafViewer",
940 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js"
572 } 941 }
573 first_contig = contig.split()[:2] 942 self.plugins.append(plugin_def)
574 first_contig.insert(0, gname) 943
575 trackDict = { 944 rel_dest = os.path.join("data", trackData["label"] + ".maf")
576 "name": gname, 945 dest = os.path.join(self.outdir, rel_dest)
577 "sequence": { 946
578 "type": "ReferenceSequenceTrack", 947 assembly_name = mafOpts.get("assembly_name", "")
579 "trackId": gname, 948 if not assembly_name:
580 "adapter": adapter, 949 # Guess from assembly
581 }, 950 assembly_name = parent['uniq_id']
582 "displays": [ 951
583 { 952 self._convert_maf(data, dest, assembly_name)
584 "type": "LinearReferenceSequenceDisplay", 953
585 "displayId": "%s-LinearReferenceSequenceDisplay" % gname, 954 # Extract samples list
586 },
587 {
588 "type": "LinearGCContentDisplay",
589 "displayId": "%s-LinearGCContentDisplay" % gname,
590 },
591 ],
592 }
593 return (trackDict, first_contig)
594
595 def add_default_view(self):
596 cmd = [
597 "jbrowse",
598 "set-default-session",
599 "-s",
600 self.config_json_file,
601 "-t",
602 ",".join(self.trackIdlist),
603 "-n",
604 "JBrowse2 in Galaxy",
605 "--target",
606 self.config_json_file,
607 "-v",
608 " LinearGenomeView",
609 ]
610 self.subprocess_check_call(cmd)
611
612 def write_config(self):
613 with open(self.config_json_file, "w") as fp:
614 json.dump(self.config_json, fp, indent=2)
615
616 def text_index(self):
617 # Index tracks
618 e = os.environ
619 e["SHELL"] = "/bin/sh"
620 cmd = ["jbrowse", "text-index"]
621 subprocess.run(cmd, env=e, shell=True)
622
623 def add_hic(self, data, trackData):
624 """
625 HiC adapter.
626 https://github.com/aidenlab/hic-format/blob/master/HiCFormatV9.md
627 for testing locally, these work:
628 HiC data is from https://s3.amazonaws.com/igv.broadinstitute.org/data/hic/intra_nofrag_30.hic
629 using hg19 reference track as a
630 'BgzipFastaAdapter'
631 fastaLocation:
632 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz',
633 faiLocation:
634 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai',
635 gziLocation:
636 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi',
637 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438
638
639 """
640 tId = trackData["label"]
641 wasCool = trackData["wasCool"]
642 # can be served - if public.
643 # dsId = trackData["metadata"]["dataset_id"]
644 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId)
645 useuri = trackData["useuri"].lower() == "yes"
646 logging.debug("wasCool=%s, data=%s, tId=%s" % (wasCool, data, tId))
647 if useuri:
648 uri = data
649 else:
650 uri = tId + ".hic"
651 if not wasCool:
652 dest = os.path.join(self.outdir, uri)
653 if not os.path.exists(dest):
654 cmd = ["cp", data, dest]
655 self.subprocess_check_call(cmd)
656 else:
657 logging.error("not wasCool but %s exists" % dest)
658 categ = trackData["category"]
659 trackDict = {
660 "type": "HicTrack",
661 "trackId": tId,
662 "name": trackData["name"],
663 "assemblyNames": [trackData["assemblyNames"]],
664 "displays": [
665 {
666 "type": "LinearHicDisplay",
667 "displayId": "%s-LinearHicDisplay" % tId,
668 }
669 ],
670 "category": [
671 categ,
672 ],
673 "adapter": {"type": "HicAdapter", "hicLocation": {"uri": uri}},
674 }
675 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
676 self.trackIdlist.append(tId)
677
678 def add_maf(self, data, trackData):
679 """
680 from https://github.com/cmdcolin/maf2bed
681 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name
682 e.g. hg38.chr1 in the sequence identifiers.
683 need the reference id - eg hg18, for maf2bed.pl as the first parameter
684 """
685 tId = trackData["label"]
686 mafPlugin = {
687 "plugins": [
688 {
689 "name": "MafViewer",
690 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js",
691 }
692 ]
693 }
694 categ = trackData["category"]
695 fname = tId
696 dest = os.path.join(self.outdir, fname)
697 gname = trackData["assemblyNames"]
698
699 cmd = [
700 "bash",
701 os.path.join(INSTALLED_TO, "convertMAF.sh"),
702 data,
703 gname,
704 INSTALLED_TO,
705 dest,
706 ]
707 self.subprocess_check_call(cmd)
708 mafs = open(data, "r").readlines() 955 mafs = open(data, "r").readlines()
709 mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))] 956 mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))]
710 samp = [x.split()[1] for x in mafss if len(x.split()) > 0] 957 samp = [x.split()[1] for x in mafss if len(x.split()) > 0]
711 sampu = list(dict.fromkeys(samp)) 958 sampu = list(dict.fromkeys(samp))
712 samples = [x.split(".")[0] for x in sampu] 959 samples = [x.split(".")[0] for x in sampu]
713 samples.sort() 960 samples.sort()
714 if logCommands: 961
715 logging.debug( 962 json_track_data = {
716 "$$$$ cmd=%s, mafss=%s samp=%s samples=%s"
717 % (" ".join(cmd), mafss, samp, samples)
718 )
719 trackDict = {
720 "type": "MafTrack", 963 "type": "MafTrack",
721 "trackId": tId, 964 "trackId": trackData["label"],
722 "name": trackData["name"], 965 "name": trackData["key"],
723 "category": [
724 categ,
725 ],
726 "adapter": { 966 "adapter": {
727 "type": "MafTabixAdapter", 967 "type": "MafTabixAdapter",
728 "samples": samples, 968 "samples": samples,
729 "bedGzLocation": { 969 "bedGzLocation": {
730 "uri": fname + ".sorted.bed.gz", 970 "uri": rel_dest + ".gz",
731 }, 971 },
732 "index": { 972 "index": {
733 "location": { 973 "location": {
734 "uri": fname + ".sorted.bed.gz.tbi", 974 "uri": rel_dest + ".gz.tbi",
735 }, 975 },
736 }, 976 },
737 }, 977 },
738 "assemblyNames": [trackData["assemblyNames"]], 978 "category": [trackData["category"]],
739 "displays": [ 979 "assemblyNames": [parent['uniq_id']],
740 {
741 "type": "LinearBasicDisplay",
742 "displayId": "%s-LinearBasicDisplay" % tId,
743 },
744 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
745 ],
746 } 980 }
747 style_json = self._prepare_track_style(trackDict) 981
748 trackDict["style"] = style_json 982 style_json = self._prepare_track_style(trackData)
749 self.tracksToAdd[gname].append(copy.copy(trackDict)) 983
750 self.trackIdlist.append(tId) 984 json_track_data.update(style_json)
751 if self.config_json.get("plugins", None): 985
752 self.config_json["plugins"].append(mafPlugin["plugins"][0]) 986 track_metadata = self._prepare_track_metadata(trackData)
753 else: 987
754 self.config_json.update(mafPlugin) 988 json_track_data.update(track_metadata)
989
990 self.subprocess_check_call(
991 [
992 "jbrowse",
993 "add-track-json",
994 "--target",
995 self.outdir,
996 json.dumps(json_track_data),
997 ]
998 )
999
1000 def add_sparql(self, parent, url, query, query_refnames, trackData):
1001 json_track_data = {
1002 "type": "FeatureTrack",
1003 "trackId": trackData["label"],
1004 "name": trackData["key"],
1005 "adapter": {
1006 "type": "SPARQLAdapter",
1007 "endpoint": {"uri": url, "locationType": "UriLocation"},
1008 "queryTemplate": query,
1009 },
1010 "category": [trackData["category"]],
1011 "assemblyNames": [parent['uniq_id']],
1012 }
1013
1014 if query_refnames:
1015 json_track_data["adapter"]["refNamesQueryTemplate"]: query_refnames
1016
1017 # TODO handle metadata somehow for sparql too
1018
1019 self.subprocess_check_call(
1020 [
1021 "jbrowse",
1022 "add-track-json",
1023 "--target",
1024 self.outdir,
1025 json.dumps(json_track_data),
1026 ]
1027 )
1028
1029 def _add_track(self, track_id, label, category, path, assembly, config=None, trackType=None, load_action="inPlace", assemblies=None, remote=False):
1030 """
1031 Adds a track to config.json using Jbrowse add-track cli
1032
1033 By default, using `--load inPlace`: the file is supposed to be already placed at the `path` relative to
1034 the outdir, `jbrowse add-track` will not touch it and trust us that the file is there and ready to use.
1035
1036 With `load_action` parameter, you can ask `jbrowse add-track` to copy/move/symlink the file for you.
1037 Not done by default because we often need more control on file copying/symlink for specific cases (indexes, symlinks of symlinks, ...)
1038 """
1039
1040 cmd = [
1041 "jbrowse",
1042 "add-track",
1043 "--name",
1044 label,
1045 "--category",
1046 category,
1047 "--target",
1048 self.outdir,
1049 "--trackId",
1050 track_id,
1051 "--assemblyNames",
1052 assemblies if assemblies else assembly['uniq_id'],
1053 ]
1054
1055 if not remote:
1056 cmd.append("--load")
1057 cmd.append(load_action)
1058
1059 if config:
1060 cmd.append("--config")
1061 cmd.append(json.dumps(config))
1062
1063 if trackType:
1064 cmd.append("--trackType")
1065 cmd.append(trackType)
1066
1067 cmd.append(path)
1068
1069 self.subprocess_check_call(cmd)
755 1070
756 def _sort_gff(self, data, dest): 1071 def _sort_gff(self, data, dest):
757 # Only index if not already done 1072 # Only index if not already done
758 if not os.path.exists(dest): 1073 if not os.path.exists(dest):
759 e = os.environ 1074 # Not using jbrowse sort-gff because it uses sort and has the problem exposed on https://github.com/tao-bioinfo/gff3sort
760 e["SHELL"] = "/bin/sh" 1075 cmd = f"gff3sort.pl --precise '{data}' | grep -v \"^$\" > '{dest}'"
761 cmd = "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest) 1076 self.subprocess_popen(cmd, cwd=False)
762 subprocess.run(cmd, env=e, shell=True) 1077
763 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) 1078 self.subprocess_check_call(["bgzip", "-f", dest], cwd=False)
764 1079 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"], cwd=False)
765 def add_gff(self, data, trackData):
766 tId = trackData["label"]
767 useuri = trackData["useuri"].lower() == "yes"
768 if useuri:
769 url = trackData["path"]
770 else:
771 url = tId + ".gz"
772 dest = os.path.join(self.outdir, url)
773 self._sort_gff(data, dest)
774 categ = trackData["category"]
775 trackDict = {
776 "type": "FeatureTrack",
777 "trackId": tId,
778 "name": trackData["name"],
779 "assemblyNames": [trackData["assemblyNames"]],
780 "category": [
781 categ,
782 ],
783 "adapter": {
784 "type": "Gff3TabixAdapter",
785 "gffGzLocation": {
786 "uri": url,
787 },
788 "index": {
789 "location": {
790 "uri": url + ".tbi",
791 }
792 },
793 },
794 "displays": [
795 {
796 "type": "LinearBasicDisplay",
797 "displayId": "%s-LinearBasicDisplay" % tId,
798 },
799 {
800 "type": "LinearArcDisplay",
801 "displayId": "%s-LinearArcDisplay" % tId,
802 },
803 ],
804 }
805 style_json = self._prepare_track_style(trackDict)
806 trackDict["style"] = style_json
807 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
808 self.trackIdlist.append(tId)
809
810 def add_bigwig(self, data, trackData):
811 tId = trackData["label"]
812 useuri = trackData["useuri"].lower() == "yes"
813 if useuri:
814 url = data
815 else:
816 url = tId
817 # slashes in names cause path trouble
818 dest = os.path.join(self.outdir, url)
819 cmd = ["cp", data, dest]
820 self.subprocess_check_call(cmd)
821 bwloc = {"uri": url}
822 categ = trackData["category"]
823 trackDict = {
824 "type": "QuantitativeTrack",
825 "trackId": tId,
826 "name": trackData["name"],
827 "category": [
828 categ,
829 ],
830 "assemblyNames": [trackData["assemblyNames"]],
831 "adapter": {
832 "type": "BigWigAdapter",
833 "bigWigLocation": bwloc,
834 },
835 "displays": [
836 {
837 "type": "LinearWiggleDisplay",
838 "displayId": "%s-LinearWiggleDisplay" % tId,
839 }
840 ],
841 }
842 style_json = self._prepare_track_style(trackDict)
843 trackDict["style"] = style_json
844 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
845 self.trackIdlist.append(tId)
846
847 def add_bam(self, data, trackData, bam_indexes=None, **kwargs):
848 tId = trackData["label"]
849 realFName = trackData["path"]
850 useuri = trackData["useuri"].lower() == "yes"
851 categ = trackData["category"]
852 if useuri:
853 url = data
854 else:
855 fname = tId
856 dest = "%s/%s" % (self.outdir, fname)
857 self.subprocess_check_call(["cp", data, dest])
858 url = fname
859 bindex = fname + ".bai"
860 bi = bam_indexes.split(",")
861 bam_index = [
862 x.split("~~~")[1].strip()
863 for x in bi
864 if "~~~" in x and x.split("~~~")[0].strip() == realFName
865 ]
866 logging.debug(
867 "===realFName=%s got %s as bam_indexes %s as bi, %s for bam_index"
868 % (realFName, bam_indexes, bi, bam_index)
869 )
870 if len(bam_index) > 0 and os.path.exists(os.path.realpath(bam_index[0])):
871 self.subprocess_check_call(["cp", bam_index[0], bindex])
872 else:
873 cmd = ["samtools", "index", "-b", "-o", bindex, data]
874 self.subprocess_check_call(cmd)
875 trackDict = {
876 "type": "AlignmentsTrack",
877 "trackId": tId,
878 "name": trackData["name"],
879 "category": [
880 categ,
881 ],
882 "assemblyNames": [trackData["assemblyNames"]],
883 "adapter": {
884 "type": "BamAdapter",
885 "bamLocation": {"uri": url},
886 "index": {
887 "location": {
888 "uri": bindex,
889 }
890 },
891 },
892 "displays": [
893 {
894 "type": "LinearAlignmentsDisplay",
895 "displayId": "%s-LinearAlignmentsDisplay" % tId,
896 },
897 ],
898 }
899 style_json = self._prepare_track_style(trackDict)
900 trackDict["style"] = style_json
901 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
902 self.trackIdlist.append(tId)
903
904 def add_cram(self, data, trackData, cram_indexes=None, **kwargs):
905 tId = trackData["label"]
906 realFName = trackData["path"]
907 categ = trackData["category"]
908 useuri = trackData["useuri"].lower() == "yes"
909 gsa = self.assmeta.get(trackData["assemblyNames"], None)
910 if gsa:
911 genseqad = gsa[0]["genome_sequence_adapter"]
912 else:
913 genseqad = "Not found"
914 logging.warning("No adapter found for cram %s in gsa=%s" % (tId, gsa))
915 if useuri:
916 url = data
917 else:
918 fname = tId
919 dest = os.path.join(self.outdir, fname)
920 url = fname
921 self.subprocess_check_call(["cp", data, dest])
922 ci = cram_indexes.split(",")
923 cram_index = [
924 x.split("~~~")[1].strip()
925 for x in ci
926 if "~~~" in x and x.split("~~~")[0].strip() == realFName
927 ]
928 logging.debug(
929 "===realFName=%s got %s as cram_indexes %s as ci, %s for cram_index"
930 % (realFName, cram_indexes, ci, cram_index)
931 )
932 if len(cram_index) > 0 and os.path.exists(cram_index[0]):
933 if not os.path.exists(dest + ".crai"):
934 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
935 self.subprocess_check_call(
936 ["cp", os.path.realpath(cram_index[0]), dest + ".crai"]
937 )
938 else:
939 cpath = os.path.realpath(dest) + ".crai"
940 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)]
941 self.subprocess_check_call(cmd)
942 trackDict = {
943 "type": "AlignmentsTrack",
944 "trackId": tId,
945 "name": trackData["name"],
946 "category": [
947 categ,
948 ],
949 "assemblyNames": [trackData["assemblyNames"]],
950 "adapter": {
951 "type": "CramAdapter",
952 "cramLocation": {"uri": url},
953 "craiLocation": {
954 "uri": url + ".crai",
955 },
956 "sequenceAdapter": genseqad,
957 },
958 "displays": [
959 {
960 "type": "LinearAlignmentsDisplay",
961 "displayId": "%s-LinearAlignmentsDisplay" % tId,
962 },
963 ],
964 }
965 style_json = self._prepare_track_style(trackDict)
966 trackDict["style"] = style_json
967 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
968 self.trackIdlist.append(tId)
969
970 def add_vcf(self, data, trackData):
971 tId = trackData["label"]
972 categ = trackData["category"]
973 useuri = trackData["useuri"].lower() == "yes"
974 if useuri:
975 url = data
976 else:
977 url = tId
978 dest = os.path.join(self.outdir, url)
979 cmd = ["bgzip", "-c", data]
980 with open(dest, "wb") as fout:
981 subprocess.run(cmd, stdout=fout)
982 cmd = ["tabix", "-f", "-p", "vcf", dest]
983 self.subprocess_check_call(cmd)
984 trackDict = {
985 "type": "VariantTrack",
986 "trackId": tId,
987 "name": trackData["name"],
988 "assemblyNames": [trackData["assemblyNames"]],
989 "category": [
990 categ,
991 ],
992 "adapter": {
993 "type": "VcfTabixAdapter",
994 "vcfGzLocation": {"uri": url},
995 "index": {
996 "location": {
997 "uri": url + ".tbi",
998 }
999 },
1000 },
1001 "displays": [
1002 {
1003 "type": "LinearVariantDisplay",
1004 "displayId": "%s-LinearVariantDisplay" % tId,
1005 },
1006 {
1007 "type": "ChordVariantDisplay",
1008 "displayId": "%s-ChordVariantDisplay" % tId,
1009 },
1010 {
1011 "type": "LinearPairedArcDisplay",
1012 "displayId": "%s-LinearPairedArcDisplay" % tId,
1013 },
1014 ],
1015 }
1016 style_json = self._prepare_track_style(trackDict)
1017 trackDict["style"] = style_json
1018 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
1019 self.trackIdlist.append(tId)
1020 1080
1021 def _sort_bed(self, data, dest): 1081 def _sort_bed(self, data, dest):
1022 # Only index if not already done 1082 # Only index if not already done
1023 if not os.path.exists(dest): 1083 if not os.path.exists(dest):
1024 cmd = ["sort", "-k1,1", "-k2,2n", data] 1084 cmd = ["sort", "-k1,1", "-k2,2n", data]
1025 ps = subprocess.run(cmd, check=True, capture_output=True) 1085 with open(dest, "w") as handle:
1026 cmd = ["bgzip", "-c"] 1086 self.subprocess_check_call(cmd, output=handle)
1027 with open(dest, "wb") as fout: 1087
1028 subprocess.run(cmd, input=ps.stdout, stdout=fout) 1088 self.subprocess_check_call(["bgzip", "-f", dest])
1029 cmd = ["tabix", "-f", "-p", "bed", dest] 1089 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"])
1030 self.subprocess_check_call(cmd) 1090
1031 1091 def _convert_maf(self, data, dest, assembly_name):
1032 def add_bed(self, data, ext, trackData): 1092 # Only convert if not already done
1033 bedPlugin = {"name": "BedScorePlugin", "umdLoc": {"uri": "bedscoreplugin.js"}} 1093 if not os.path.exists(dest):
1034 tId = trackData["label"] 1094
1035 categ = trackData["category"] 1095 dest_bed = dest + ".bed"
1036 useuri = trackData["useuri"].lower() == "yes" 1096 cmd = ["python", os.path.join(SELF_LOCATION, "maf2bed.py"), assembly_name, data, dest_bed]
1037 if useuri: 1097 self.subprocess_check_call(cmd, cwd=False)
1038 url = data 1098
1039 else: 1099 cmd = ["sort", "-k1,1", "-k2,2n", dest_bed]
1040 url = tId + ".gz" 1100 with open(dest, "w") as handle:
1041 dest = os.path.join(self.outdir, url) 1101 self.subprocess_check_call(cmd, output=handle)
1042 self._sort_bed(data, dest) 1102
1043 if True or trackData.get("usebedscore", None): 1103 self.subprocess_check_call(["bgzip", "-f", dest], cwd=False)
1044 bedgzlocation = { 1104 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"], cwd=False)
1045 "uri": url, 1105
1046 "columnNames": ["chr", "start", "end", "name", "score"], 1106 def process_annotations(self, track, parent):
1047 "scoreColumn": "score",
1048 }
1049 else:
1050 bedgzlocation = {
1051 "uri": url,
1052 }
1053 trackDict = {
1054 "type": "FeatureTrack",
1055 "trackId": tId,
1056 "name": trackData["name"],
1057 "assemblyNames": [trackData["assemblyNames"]],
1058 "adapter": {
1059 "category": [
1060 categ,
1061 ],
1062 "type": "BedTabixAdapter",
1063 "bedGzLocation": bedgzlocation,
1064 "index": {
1065 "location": {
1066 "uri": url + ".tbi",
1067 },
1068 },
1069 },
1070 "displays": [
1071 {
1072 "type": "LinearBasicDisplay",
1073 "displayId": "%s-LinearBasicDisplay" % tId,
1074 "renderer": {
1075 "type": "SvgFeatureRenderer",
1076 "color1": "jexl:customColor(feature)",
1077 },
1078 },
1079 {
1080 "type": "LinearPileupDisplay",
1081 "displayId": "%s-LinearPileupDisplay" % tId,
1082 },
1083 {
1084 "type": "LinearArcDisplay",
1085 "displayId": "%s-LinearArcDisplay" % tId,
1086 },
1087 ],
1088 }
1089 style_json = self._prepare_track_style(trackDict)
1090 trackDict["style"] = style_json
1091 if self.config_json.get("plugins", None):
1092 self.config_json["plugins"].append(bedPlugin)
1093 else:
1094 self.config_json["plugins"] = [
1095 bedPlugin,
1096 ]
1097 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
1098 self.trackIdlist.append(tId)
1099
1100 def add_paf(self, data, trackData, pafOpts, **kwargs):
1101 canPIF = True
1102 tname = trackData["name"]
1103 tId = trackData["label"]
1104 url = tId
1105 usePIF = False # much faster if indexed remotely or locally but broken in biocontainer.
1106 useuri = data.startswith("http://") or data.startswith("https://")
1107 if not useuri:
1108 if canPIF:
1109 fakeName = os.path.join(self.outdir, "%s.paf" % tId)
1110 url = "%s.pif.gz" % tId
1111 cmd = ["cp", data, fakeName]
1112 self.subprocess_check_call(cmd)
1113 e = os.environ
1114 e["SHELL"] = "/bin/sh"
1115 cmd = [
1116 "jbrowse",
1117 "make-pif",
1118 fakeName,
1119 ]
1120 subprocess.run(cmd, env=e, shell=True)
1121 usePIF = True
1122 else:
1123 dest = os.path.join(self.outdir, url)
1124 self.symlink_or_copy(os.path.realpath(data), dest)
1125 else:
1126 url = data
1127 if data.endswith(".pif.gz") or data.endswith(".paf.gz"): # is tabix
1128 usePIF = True
1129 categ = trackData["category"]
1130 pg = pafOpts["genome"].split(",")
1131 pgc = [x.strip() for x in pg if x.strip() > ""]
1132 gnomes = [x.split("~~~") for x in pgc]
1133 logging.debug("pg=%s, gnomes=%s" % (pg, gnomes))
1134 passnames = [trackData["assemblyNames"]] # always first
1135 for i, (gpath, gname) in enumerate(gnomes):
1136 # may have been forgotten by user for uri
1137 if len(gname) == 0:
1138 gn = os.path.basename(gpath)
1139 gname = os.path.splitext(gn)[0]
1140 # trouble from spacey names in command lines avoidance
1141 if len(gname.split()) > 1:
1142 gname = gname.split()[0]
1143 if gname not in passnames:
1144 passnames.append(gname)
1145 useuri = pafOpts["useuri"] == "true"
1146 if gname not in self.genome_names:
1147 # ignore if already there - eg for duplicates among pafs.
1148 asstrack, first_contig = self.make_assembly(gpath, gname, useuri)
1149 self.genome_names.append(gname)
1150 self.tracksToAdd[gname] = []
1151 self.assemblies.append(copy.copy(asstrack))
1152 self.ass_first_contigs.append(copy.copy(first_contig))
1153 trackDict = {
1154 "type": "SyntenyTrack",
1155 "trackId": tId,
1156 "assemblyNames": passnames,
1157 "category": [
1158 categ,
1159 ],
1160 "name": tname,
1161 "displays": [
1162 {
1163 "type": "LGVSyntenyDisplay",
1164 "displayId": "%s-LGVSyntenyDisplay" % tId,
1165 },
1166 {
1167 "type": "DotplotDisplay",
1168 "displayId": "%s-DotplotDisplay" % tId,
1169 },
1170 {
1171 "type": "LinearComparativeDisplay",
1172 "displayId": "%s-LinearComparativeDisplay" % tId,
1173 },
1174 {
1175 "type": "LinearBasicDisplay",
1176 "displayId": "%s-LinearSyntenyDisplay" % tId,
1177 },
1178 ],
1179 }
1180 if usePIF:
1181 trackDict["adapter"] = {
1182 "type": "PairwiseIndexedPAFAdapter",
1183 "pifGzLocation": {"uri": url},
1184 "assemblyNames": passnames,
1185 "index": {
1186 "location": {
1187 "uri": url + ".tbi",
1188 }
1189 },
1190 }
1191 else:
1192 trackDict["adapter"] = {
1193 "type": "PAFAdapter",
1194 "pafLocation": {"uri": url},
1195 "assemblyNames": passnames,
1196 }
1197 if not usePIF:
1198 style_json = {
1199 "type": "LGVSyntenyDisplay",
1200 "displayId": "%s-LGVSyntenyDisplay" % tId,
1201 }
1202 else:
1203 style_json = {
1204 "type": "LinearBasicDisplay",
1205 "displayId": "%s-LinearBasicDisplay" % tId,
1206 }
1207 trackDict["style"] = style_json
1208 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
1209 self.trackIdlist.append(tId)
1210
1211 def process_annotations(self, track):
1212 category = track["category"].replace("__pd__date__pd__", TODAY) 1107 category = track["category"].replace("__pd__date__pd__", TODAY)
1213 tt1 = ",/ :;\\" 1108
1214 tt2 = "______" 1109 track_labels = []
1215 labttab = str.maketrans(tt1, tt2) 1110
1216 for trackIndex, ( 1111 for i, (
1217 dataset_path, 1112 dataset_path,
1218 dataset_ext, 1113 dataset_ext,
1219 useuri,
1220 track_human_label, 1114 track_human_label,
1221 extra_metadata, 1115 extra_metadata,
1222 ) in enumerate(track["trackfiles"]): 1116 ) in enumerate(track["trackfiles"]):
1223 if not dataset_path.strip().startswith("http"): 1117 # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
1224 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) 1118 for key, value in mapped_chars.items():
1225 for key, value in mapped_chars.items(): 1119 track_human_label = track_human_label.replace(value, key)
1226 track_human_label = track_human_label.replace(value, key) 1120
1227 track_human_label = track_human_label.translate(labttab) 1121 is_multi = type(dataset_path) is list
1122
1123 log.info(
1124 f"-----> Processing track {category} / {track_human_label} ({dataset_ext}, {len(dataset_path) if is_multi else 1} files)"
1125 )
1126
1228 outputTrackConfig = { 1127 outputTrackConfig = {
1229 "category": category, 1128 "category": category,
1230 "style": {},
1231 } 1129 }
1232 1130
1233 outputTrackConfig["assemblyNames"] = track["assemblyNames"]
1234 outputTrackConfig["key"] = track_human_label 1131 outputTrackConfig["key"] = track_human_label
1235 outputTrackConfig["useuri"] = useuri 1132 # We add extra data to hash for the case of non-file tracks
1236 outputTrackConfig["path"] = dataset_path 1133 if (
1237 outputTrackConfig["ext"] = dataset_ext 1134 "conf" in track
1238 outputTrackConfig["trackset"] = track.get("trackset", {}) 1135 and "options" in track["conf"]
1239 outputTrackConfig["label"] = track["label"] 1136 and "url" in track["conf"]["options"]
1137 ):
1138 non_file_info = track["conf"]["options"]["url"]
1139 else:
1140 non_file_info = ""
1141
1142 # I chose to use track['category'] instead of 'category' here. This
1143 # is intentional. This way re-running the tool on a different date
1144 # will not generate different hashes and make comparison of outputs
1145 # much simpler.
1146 hashData = [
1147 str(dataset_path),
1148 track_human_label,
1149 track["category"],
1150 non_file_info,
1151 parent["uniq_id"],
1152 ]
1153 hashData = "|".join(hashData).encode("utf-8")
1154 outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + f"_{track['track_num']}_{i}"
1240 outputTrackConfig["metadata"] = extra_metadata 1155 outputTrackConfig["metadata"] = extra_metadata
1241 outputTrackConfig["name"] = track_human_label 1156
1242 if track["label"] in self.trackIdlist: 1157 outputTrackConfig["style"] = track["style"]
1243 logging.error( 1158
1244 "### not adding %s already in %s" 1159 outputTrackConfig["formatdetails"] = track["formatdetails"]
1245 % (track["label"], self.trackIdlist) 1160
1246 ) 1161 outputTrackConfig["remote"] = track["remote"]
1247 yield None 1162
1163 # Guess extension for remote data
1164 if dataset_ext == "gff,gff3,bed":
1165 if dataset_path.endswith(".bed") or dataset_path.endswith(".bed.gz"):
1166 dataset_ext = "bed"
1167 else:
1168 dataset_ext = "gff"
1169 elif dataset_ext == "vcf,vcf_bgzip":
1170 if dataset_path.endswith(".vcf.gz"):
1171 dataset_ext = "vcf_bgzip"
1172 else:
1173 dataset_ext = "vcf"
1174
1248 if dataset_ext in ("gff", "gff3"): 1175 if dataset_ext in ("gff", "gff3"):
1249 self.add_gff( 1176 self.add_gff(
1250 dataset_path, 1177 parent,
1251 outputTrackConfig,
1252 )
1253 elif dataset_ext in ("hic", "juicebox_hic"):
1254 outputTrackConfig["wasCool"] = False
1255 self.add_hic(
1256 dataset_path,
1257 outputTrackConfig,
1258 )
1259 elif dataset_ext in ("cool", "mcool", "scool"):
1260 hic_url = outputTrackConfig["label"]
1261 hic_path = os.path.join(self.outdir, hic_url) + ".hic"
1262 outputTrackConfig["wasCool"] = True
1263 self.subprocess_check_call(
1264 [
1265 "hictk",
1266 "convert",
1267 "-f",
1268 "--output-fmt",
1269 "hic",
1270 dataset_path,
1271 hic_path,
1272 ]
1273 )
1274 self.add_hic(
1275 hic_path,
1276 outputTrackConfig,
1277 )
1278 elif dataset_ext in ("bed",):
1279 self.add_bed(
1280 dataset_path, 1178 dataset_path,
1281 dataset_ext, 1179 dataset_ext,
1282 outputTrackConfig, 1180 outputTrackConfig,
1181 track["conf"]["options"]["gff"],
1283 ) 1182 )
1284 elif dataset_ext in ("maf",): 1183 elif dataset_ext == "bed":
1184 self.add_bed(
1185 parent,
1186 dataset_path,
1187 dataset_ext,
1188 outputTrackConfig,
1189 track["conf"]["options"]["gff"],
1190 )
1191 elif dataset_ext == "bigwig":
1192 if is_multi:
1193 self.add_bigwig_multi(
1194 parent,
1195 dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"]
1196 )
1197 else:
1198 self.add_bigwig(
1199 parent,
1200 dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"]
1201 )
1202 elif dataset_ext == "maf":
1285 self.add_maf( 1203 self.add_maf(
1204 parent,
1205 dataset_path, outputTrackConfig, track["conf"]["options"]["maf"]
1206 )
1207 elif dataset_ext == "bam":
1208
1209 if track["remote"]:
1210 bam_index = dataset_path + '.bai'
1211 else:
1212 real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][
1213 "bam_index"
1214 ]
1215 if not isinstance(real_indexes, list):
1216 # <bam_indices>
1217 # <bam_index>/path/to/a.bam.bai</bam_index>
1218 # </bam_indices>
1219 #
1220 # The above will result in the 'bam_index' key containing a
1221 # string. If there are two or more indices, the container
1222 # becomes a list. Fun!
1223 real_indexes = [real_indexes]
1224
1225 bam_index = real_indexes[i]
1226
1227 self.add_xam(
1228 parent,
1286 dataset_path, 1229 dataset_path,
1287 outputTrackConfig, 1230 outputTrackConfig,
1231 track["conf"]["options"]["pileup"],
1232 index=bam_index,
1233 ext="bam",
1288 ) 1234 )
1289 elif dataset_ext == "bigwig": 1235 elif dataset_ext == "cram":
1290 self.add_bigwig( 1236
1237 if track["remote"]:
1238 cram_index = dataset_path + '.crai'
1239 else:
1240 real_indexes = track["conf"]["options"]["cram"]["cram_indices"][
1241 "cram_index"
1242 ]
1243 if not isinstance(real_indexes, list):
1244 # <bam_indices>
1245 # <bam_index>/path/to/a.bam.bai</bam_index>
1246 # </bam_indices>
1247 #
1248 # The above will result in the 'bam_index' key containing a
1249 # string. If there are two or more indices, the container
1250 # becomes a list. Fun!
1251 real_indexes = [real_indexes]
1252
1253 cram_index = real_indexes[i]
1254
1255 self.add_xam(
1256 parent,
1291 dataset_path, 1257 dataset_path,
1292 outputTrackConfig, 1258 outputTrackConfig,
1259 track["conf"]["options"]["cram"],
1260 index=cram_index,
1261 ext="cram",
1293 ) 1262 )
1294 elif dataset_ext == "bam": 1263 elif dataset_ext == "vcf":
1295 real_indexes = track["conf"]["options"]["bam"]["bam_index"] 1264 self.add_vcf(
1296 self.add_bam( 1265 parent,
1266 dataset_path,
1267 outputTrackConfig
1268 )
1269 elif dataset_ext == "vcf_bgzip":
1270 self.add_vcf(
1271 parent,
1297 dataset_path, 1272 dataset_path,
1298 outputTrackConfig, 1273 outputTrackConfig,
1299 bam_indexes=real_indexes, 1274 zipped=True
1300 ) 1275 )
1301 elif dataset_ext == "cram": 1276 elif dataset_ext == "paf": # https://fr.wikipedia.org/wiki/Paf_le_chien
1302 real_indexes = track["conf"]["options"]["cram"]["cram_index"] 1277 self.add_paf(
1303 self.add_cram( 1278 parent,
1304 dataset_path, 1279 dataset_path,
1305 outputTrackConfig, 1280 outputTrackConfig,
1306 cram_indexes=real_indexes, 1281 track["conf"]["options"]["synteny"]
1307 ) 1282 )
1308 elif dataset_ext == "vcf": 1283 elif dataset_ext in ("hic"):
1309 self.add_vcf(dataset_path, outputTrackConfig) 1284 self.add_hic(
1310 elif dataset_ext == "paf": 1285 parent,
1311 self.add_paf(
1312 dataset_path, 1286 dataset_path,
1313 outputTrackConfig, 1287 outputTrackConfig,
1314 track["conf"]["options"]["paf"], 1288 track["conf"]["options"]["hic"]
1289 )
1290 elif dataset_ext == "sparql":
1291 sparql_query = track["conf"]["options"]["sparql"]["query"]
1292 for key, value in mapped_chars.items():
1293 sparql_query = sparql_query.replace(value, key)
1294 sparql_query_refnames = track["conf"]["options"]["sparql"].get("query_refnames", "")
1295 if sparql_query_refnames:
1296 for key, value in mapped_chars.items():
1297 sparql_query_refnames = sparql_query_refnames.replace(value, key)
1298 self.add_sparql(
1299 parent,
1300 track["conf"]["options"]["sparql"]["url"],
1301 sparql_query,
1302 sparql_query_refnames,
1303 outputTrackConfig,
1304 )
1305 elif dataset_ext == "gc":
1306 self.add_gc_content(
1307 parent,
1308 outputTrackConfig,
1315 ) 1309 )
1316 else: 1310 else:
1317 logging.warning("Do not know how to handle %s", dataset_ext) 1311 log.error(f"Do not know how to handle {dataset_ext}")
1318 # Return non-human label for use in other fields 1312
1319 yield outputTrackConfig["label"] 1313 track_labels.append(outputTrackConfig["label"])
1320 1314
1321 def add_default_session(self, default_data): 1315 # Return non-human label for use in other fields
1316 return track_labels
1317
1318 def add_default_view_genome(self, genome, default_loc, tracks_on):
1319
1320 refName = ""
1321 start = end = None
1322 if default_loc:
1323 loc_match = re.search(r"^(\w+):(\d+)\.+(\d+)$", default_loc)
1324 if loc_match:
1325 refName = loc_match.group(1)
1326 start = int(loc_match.group(2))
1327 end = int(loc_match.group(3))
1328
1329 if not refName and self.assembly_ids[genome['uniq_id']]:
1330 refName = self.assembly_ids[genome['uniq_id']]
1331
1332 if start and end:
1333 loc_str = f"{refName}:{start}-{end}"
1334 else:
1335 loc_str = refName
1336
1337 # Updating an existing jbrowse instance, merge with pre-existing view
1338 view_specs = None
1339 if self.update:
1340 for existing in self.default_views.values():
1341 if len(existing) and existing["type"] == "LinearGenomeView":
1342 if existing['init']['assembly'] == genome['uniq_id']:
1343 view_specs = existing
1344 if loc_str:
1345 view_specs['init']['loc'] = loc_str
1346 view_specs['init']['tracks'].extend(tracks_on)
1347
1348 if view_specs is None: # Not updating, or updating from synteny
1349 view_specs = {
1350 "type": "LinearGenomeView",
1351 "init": {
1352 "assembly": genome['uniq_id'],
1353 "loc": loc_str,
1354 "tracks": tracks_on
1355 }
1356 }
1357
1358 return view_specs
1359
1360 def add_default_view_synteny(self, genome_views, synteny_tracks):
1361
1362 # Add json for cached synteny tracks
1363 # We cache them because we need to know the target genome uniq_id
1364 for strack in synteny_tracks:
1365
1366 # Target assembly is the next genome, find its uniq_id
1367 query_assembly = strack["assemblyNames"][0]
1368 ass_uniq_ids = list(self.assembly_ids.keys())
1369 query_index = ass_uniq_ids.index(query_assembly)
1370 target_assembly = ass_uniq_ids[query_index + 1]
1371
1372 strack["assemblyNames"][1] = target_assembly
1373 strack["adapter"]["assemblyNames"][1] = target_assembly
1374
1375 self.subprocess_check_call(
1376 [
1377 "jbrowse",
1378 "add-track-json",
1379 "--target",
1380 self.outdir,
1381 json.dumps(strack),
1382 ]
1383 )
1384
1385 # Configure the synteny view
1386 levels = []
1387
1388 for strack in synteny_tracks:
1389 lev = {
1390 "type": "LinearSyntenyViewHelper",
1391 "tracks": [
1392 {
1393 "type": "SyntenyTrack",
1394 "configuration": strack["trackId"],
1395 "displays": [
1396 {
1397 "type": "LinearSyntenyDisplay",
1398 "configuration": strack["trackId"] + "_LinearSyntenyDisplay"
1399 }
1400 ]
1401 }
1402 ],
1403 "height": 100,
1404 "level": len(levels)
1405 }
1406 levels.append(lev)
1407
1408 view_specs = {
1409 "type": "LinearSyntenyView",
1410 "views": genome_views,
1411 "levels": levels
1412 }
1413
1414 return view_specs
1415
1416 def add_default_session(self, default_views):
1322 """ 1417 """
1323 default session settings are hard and fragile.
1324 .add_default_view() and other configuration code adapted from
1325 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py
1326 """
1327 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
1328 bpPerPx = (
1329 self.bpPerPx
1330 ) # Browser window width is unknown and default session cannot be used to figure it out in JB2 code so could be 200-2000+ pixels.
1331 track_types = {}
1332 with open(self.config_json_file, "r") as config_file:
1333 config_json = json.load(config_file)
1334 if self.config_json:
1335 config_json.update(self.config_json)
1336 if "defaultSession" in config_json:
1337 session_json = config_json["defaultSession"]
1338 session_views = []
1339 else:
1340 session_json = {}
1341 session_views = []
1342 for gnome in self.assmeta.keys(): # assemblies have their own tracks
1343 tracks_data = []
1344 for track_conf in self.tracksToAdd[gnome]:
1345 tId = track_conf["trackId"]
1346 if tId in default_data[gnome]["visibility"]["default_on"]:
1347 track_types[tId] = track_conf["type"]
1348 style_data = default_data[gnome]["style"].get(tId, {})
1349 if not style_data:
1350 logging.debug(
1351 "No style data for %s in available default data %s"
1352 % (tId, default_data)
1353 )
1354 else:
1355 logging.debug("style data for %s = %s" % (tId, style_data))
1356 if style_data.get("type", None) is None:
1357 style_data["type"] = "LinearBasicDisplay"
1358 if "displays" in track_conf:
1359 disp = track_conf["displays"][0]["type"]
1360 style_data["type"] = disp
1361 if track_conf.get("displays", None):
1362 style_data["configuration"] = track_conf["displays"][0][
1363 "displayId"
1364 ]
1365 else:
1366 logging.debug("no display in track_conf for %s" % tId)
1367 if track_conf.get("style_labels", None):
1368 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
1369 # TODO move this to per track displays?
1370 style_data["labels"] = track_conf["style_labels"]
1371 tracks_data.append(
1372 {
1373 "type": track_types[tId],
1374 "configuration": tId,
1375 "displays": [style_data],
1376 }
1377 )
1378 first = [x for x in self.ass_first_contigs if x[0] == gnome]
1379 drdict = {
1380 "reversed": False,
1381 "assemblyName": gnome,
1382 }
1383 if len(first) > 0:
1384 [gnome, refName, end] = first[0]
1385 drdict["refName"] = refName
1386 drdict["start"] = 0
1387 end = int(end)
1388 drdict["end"] = end
1389 else:
1390 ddl = default_data.get("defaultLocation", None)
1391 if ddl:
1392 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl)
1393 # allow commas like 100,000 but ignore as integer
1394 if loc_match:
1395 refName = loc_match.group(1)
1396 drdict["refName"] = refName
1397 if loc_match.group(2) > "":
1398 drdict["start"] = int(loc_match.group(2).replace(",", ""))
1399 if loc_match.group(3) > "":
1400 drdict["end"] = int(loc_match.group(3).replace(",", ""))
1401 else:
1402 logging.info(
1403 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"
1404 % ddl
1405 )
1406 view_json = {
1407 "type": "LinearGenomeView",
1408 "offsetPx": 0,
1409 "bpPerPx": bpPerPx,
1410 "minimized": False,
1411 "tracks": tracks_data,
1412 }
1413 if drdict.get("refName", None):
1414 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
1415 view_json["displayedRegions"] = [
1416 drdict,
1417 ]
1418 logging.info("@@@ defaultlocation %s for default session" % drdict)
1419 else:
1420 logging.info(
1421 "@@@ no track location for default session - please add one!"
1422 )
1423 session_views.append(view_json)
1424 session_name = default_data.get("session_name", "New session")
1425 session_json["name"] = session_name
1426
1427 if "views" not in session_json:
1428 session_json["views"] = session_views
1429 else:
1430 session_json["views"] += session_views
1431
1432 pp = json.dumps(session_views, indent=2)
1433 config_json["defaultSession"] = session_json
1434 self.config_json.update(config_json)
1435 logging.debug("defaultSession=%s" % (pp))
1436 with open(self.config_json_file, "w") as config_file:
1437 json.dump(self.config_json, config_file, indent=2)
1438
1439 def add_defsess_to_index(self, data):
1440 """
1441 ----------------------------------------------------------
1442 Add some default session settings: set some assemblies/tracks on/off 1418 Add some default session settings: set some assemblies/tracks on/off
1443 1419
1444 This allows to select a default view: 1420 This allows to select a default view:
1445 - jb type (Linear, Circular, etc) 1421 - jb type (Linear, Circular, etc)
1446 - default location on an assembly 1422 - default location on an assembly
1447 - default tracks 1423 - default tracks
1448 - ... 1424 - ...
1449 1425
1450 Different methods to do that were tested/discussed: 1426 Now using this method:
1451 - using a defaultSession item in config.json: this proved to be difficult: 1427 https://github.com/GMOD/jbrowse-components/pull/4907
1428
1429 Different methods that were tested/discussed earlier:
1430 - using a defaultSession item in config.json before PR 4970: this proved to be difficult:
1452 forced to write a full session block, including hard-coded/hard-to-guess items, 1431 forced to write a full session block, including hard-coded/hard-to-guess items,
1453 no good way to let Jbrowse2 display a scaffold without knowing its size 1432 no good way to let Jbrowse2 display a scaffold without knowing its size
1454 - using JBrowse2 as an embedded React component in a tool-generated html file: 1433 - using JBrowse2 as an embedded React component in a tool-generated html file:
1455 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ... 1434 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ...
1456 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below) 1435 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below)
1457 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session 1436 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session
1458 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe 1437 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe
1459 we selected this option
1460 1438
1461 Xrefs to understand the choices: 1439 Xrefs to understand the choices:
1462 https://github.com/GMOD/jbrowse-components/issues/2708 1440 https://github.com/GMOD/jbrowse-components/issues/2708
1463 https://github.com/GMOD/jbrowse-components/discussions/3568 1441 https://github.com/GMOD/jbrowse-components/discussions/3568
1464 https://github.com/GMOD/jbrowse-components/pull/4148 1442 https://github.com/GMOD/jbrowse-components/pull/4148
1465 """ 1443 """
1466 new_index = "Nothing written" 1444
1467 session_spec = {"views": []} 1445 if self.use_synteny_viewer:
1468 logging.debug("def ass_first=%s\ndata=%s" % (self.ass_first_contigs, data)) 1446 session_name = "Synteny"
1469 for first_contig in self.ass_first_contigs: 1447 else:
1470 logging.debug("first contig=%s" % self.ass_first_contigs) 1448 session_name = ', '.join(x['init']['assembly'] for x in default_views)
1471 [gnome, refName, end] = first_contig 1449
1472 start = 0 1450 session_spec = {
1473 aview = { 1451 "name": session_name,
1474 "assembly": gnome, 1452 "views": default_views
1475 "loc": "{}:{}..{}".format(refName, start, end), 1453 }
1476 "type": "LinearGenomeView", 1454
1477 "tracks": data[gnome]["tracks"], 1455 config_path = os.path.join(self.outdir, "config.json")
1478 } 1456 with open(config_path, "r") as config_file:
1479 session_spec["views"].append(aview) 1457 config_json = json.load(config_file)
1480 sess = json.dumps(session_spec, sort_keys=True, indent=2) 1458
1481 new_index = INDEX_TEMPLATE.replace( 1459 config_json["defaultSession"].update(session_spec)
1482 "__SESSION_SPEC__", "&session=spec-{}".format(sess) 1460
1483 ) 1461 with open(config_path, "w") as config_file:
1484 1462 json.dump(config_json, config_file, indent=2)
1485 os.rename(
1486 os.path.join(self.outdir, "index.html"),
1487 os.path.join(self.outdir, "index_noview.html"),
1488 )
1489
1490 with open(os.path.join(self.outdir, "index.html"), "w") as nind:
1491 nind.write(new_index)
1492 logging.debug(
1493 "#### add_defsession gnome=%s refname=%s\nsession_spec=%s\nnew_index=%s"
1494 % (gnome, refName, sess, new_index)
1495 )
1496 1463
1497 def add_general_configuration(self, data): 1464 def add_general_configuration(self, data):
1498 """ 1465 """
1499 Add some general configuration to the config.json file 1466 Add some general configuration to the config.json file
1500 """ 1467 """
1501 1468
1502 config_path = self.config_json_file 1469 config_path = os.path.join(self.outdir, "config.json")
1503 if os.path.exists(config_path): 1470 with open(config_path, "r") as config_file:
1504 with open(config_path, "r") as config_file: 1471 config_json = json.load(config_file)
1505 config_json = json.load(config_file) 1472
1506 else:
1507 config_json = {}
1508 if self.config_json:
1509 config_json.update(self.config_json)
1510 config_data = {} 1473 config_data = {}
1511 1474
1512 config_data["disableAnalytics"] = data.get("analytics", "false") == "true" 1475 config_data["disableAnalytics"] = data.get("analytics", "false") == "true"
1513 1476
1514 config_data["theme"] = { 1477 config_data["theme"] = {
1518 "tertiary": {"main": data.get("tertiary_color", "#135560")}, 1481 "tertiary": {"main": data.get("tertiary_color", "#135560")},
1519 "quaternary": {"main": data.get("quaternary_color", "#FFB11D")}, 1482 "quaternary": {"main": data.get("quaternary_color", "#FFB11D")},
1520 }, 1483 },
1521 "typography": {"fontSize": int(data.get("font_size", 10))}, 1484 "typography": {"fontSize": int(data.get("font_size", 10))},
1522 } 1485 }
1523 if not config_json.get("configuration", None): 1486
1524 config_json["configuration"] = {}
1525 config_json["configuration"].update(config_data) 1487 config_json["configuration"].update(config_data)
1526 self.config_json.update(config_json) 1488
1527 with open(config_path, "w") as config_file: 1489 with open(config_path, "w") as config_file:
1528 json.dump(self.config_json, config_file, indent=2) 1490 json.dump(config_json, config_file, indent=2)
1529 1491
1530 def clone_jbrowse(self, realclone=False): 1492 def add_plugins(self, data):
1531 """ 1493 """
1532 Clone a JBrowse directory into a destination directory. 1494 Add plugins to the config.json file
1533
1534 `realclone=true` will use the `jbrowse create` command.
1535 To allow running on internet-less compute and for reproducibility
1536 use frozen code with `realclone=false
1537
1538 """ 1495 """
1539 dest = self.outdir 1496
1540 if (not os.path.exists(self.jbrowse2path)) or realclone: 1497 config_path = os.path.join(self.outdir, "config.json")
1541 e = os.environ 1498 with open(config_path, "r") as config_file:
1542 e["SHELL"] = "/bin/sh" 1499 config_json = json.load(config_file)
1543 cmd = ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"] 1500
1544 subprocess.run(cmd, env=e, shell=True) 1501 if "plugins" not in config_json:
1502 config_json["plugins"] = []
1503
1504 config_json["plugins"].extend(data)
1505
1506 with open(config_path, "w") as config_file:
1507 json.dump(config_json, config_file, indent=2)
1508
1509 def clone_jbrowse(self, jbrowse_dir, destination):
1510 """
1511 Clone a JBrowse directory into a destination directory.
1512
1513 Not using `jbrowse create` command to allow running on internet-less compute + to make sure code is frozen
1514 """
1515
1516 copytree(jbrowse_dir, destination)
1517 try:
1518 shutil.rmtree(os.path.join(destination, "test_data"))
1519 except OSError as e:
1520 log.error(f"Error: {e.filename} - {e.strerror}.")
1521
1522 if not os.path.exists(os.path.join(destination, "data")):
1523 # It can already exist if upgrading an instance
1524 os.makedirs(os.path.join(destination, "data"))
1525 log.info(f"makedir {os.path.join(destination, 'data')}")
1526
1527 os.symlink("./data/config.json", os.path.join(destination, "config.json"))
1528
1529
1530 def copytree(src, dst, symlinks=False, ignore=None):
1531 for item in os.listdir(src):
1532 s = os.path.join(src, item)
1533 d = os.path.join(dst, item)
1534 if os.path.isdir(s):
1535 shutil.copytree(s, d, symlinks, ignore)
1545 else: 1536 else:
1546 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) 1537 shutil.copy2(s, d)
1547 for fn in [
1548 "asset-manifest.json",
1549 "favicon.ico",
1550 "robots.txt",
1551 "umd_plugin.js",
1552 "version.txt",
1553 "test_data",
1554 ]:
1555 try:
1556 path = os.path.join(dest, fn)
1557 if os.path.isdir(path):
1558 shutil.rmtree(path)
1559 else:
1560 os.remove(path)
1561 except OSError as e:
1562 log.error("Error: %s - %s." % (e.filename, e.strerror))
1563 for neededfile in ["jb2_webserver.py", "bedscoreplugin.js"]:
1564 shutil.copyfile(
1565 os.path.join(INSTALLED_TO, neededfile), os.path.join(dest, neededfile)
1566 )
1567 1538
1568 1539
1569 def parse_style_conf(item): 1540 def parse_style_conf(item):
1570 if item.text.lower() in ["false", "true", "yes", "no"]: 1541 if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]:
1571 return item.text.lower in ("yes", "true") 1542 if item.attrib["type"] == "boolean":
1572 elif item.text.isdigit(): 1543 return item.text in ("yes", "true", "True")
1573 return int(item.text) 1544 elif item.attrib["type"] == "integer":
1574 return item.text 1545 return int(item.text)
1546 else:
1547 return item.text
1548
1549
1550 def validate_synteny(real_root):
1551
1552 if len(real_root.findall('assembly/tracks/track[@format="synteny"]')) == 0:
1553 # No synteny data, all good
1554 return False
1555
1556 assemblies = real_root.findall("assembly")
1557
1558 if len(assemblies[-1].findall('tracks/track[@format="synteny"]')) > 0 and \
1559 assemblies[-1].find('tracks/track[@format="synteny"]/options/style/display').text == "LinearSyntenyDisplay":
1560 raise RuntimeError("You should not set a synteny track on the last genome.")
1561
1562 for assembly in assemblies[1:0]:
1563 if len(assembly.findall('tracks/track[@format="synteny"]')) != 1 and \
1564 assembly.find('tracks/track[@format="synteny"]/options/style/display').text == "LinearSyntenyDisplay":
1565 raise RuntimeError("To use the synteny viewer, you should add a synteny track to each assembly, except the last one.")
1566
1567 return True
1575 1568
1576 1569
1577 if __name__ == "__main__": 1570 if __name__ == "__main__":
1578 parser = argparse.ArgumentParser(description="", epilog="") 1571 parser = argparse.ArgumentParser(description="", epilog="")
1579 parser.add_argument("--xml", help="Track Configuration") 1572 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration")
1580 parser.add_argument( 1573
1581 "--jbrowse2path", help="Path to JBrowse2 directory in BioContainer or Conda" 1574 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
1582 ) 1575 parser.add_argument("--update", help="Update an existing JBrowse2 instance", action="store_true")
1583 parser.add_argument("--outdir", help="Output directory", default="out") 1576 parser.add_argument("--outdir", help="Output directory", default="out")
1584 parser.add_argument("--version", "-V", action="version", version=JB2VER)
1585 args = parser.parse_args() 1577 args = parser.parse_args()
1586 tree = ET.parse(args.xml) 1578
1587 root = tree.getroot() 1579 tree = ET.parse(args.xml.name)
1588 removeMe = string.punctuation.replace(".", " ").replace("/", "").replace("-", "") 1580 real_root = tree.getroot()
1589 # first is a space because space needs to be added here for removal from labels as paths. 1581
1590 nopunct = str.maketrans(dict.fromkeys(removeMe))
1591 # This should be done ASAP 1582 # This should be done ASAP
1592 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text
1593 # Sometimes this comes as `localhost` without a protocol 1583 # Sometimes this comes as `localhost` without a protocol
1584 GALAXY_INFRASTRUCTURE_URL = real_root.find("metadata/galaxyUrl").text
1594 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): 1585 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"):
1595 # so we'll prepend `http://` and hope for the best. Requests *should* 1586 # so we'll prepend `http://` and hope for the best. Requests *should*
1596 # be GET and not POST so it should redirect OK 1587 # be GET and not POST so it should redirect OK
1597 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL 1588 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL
1598 1589
1599 jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path) 1590 jc = JbrowseConnector(
1600 1591 jbrowse=args.jbrowse,
1601 default_session_data = {} 1592 outdir=args.outdir,
1602 trackI = 0 1593 update=args.update,
1603 for ass in root.findall("assembly"): 1594 )
1604 genomes = [ 1595
1605 { 1596 # Synteny options are special, check them first
1606 "path": x.attrib["path"], 1597 jc.use_synteny_viewer = validate_synteny(real_root)
1607 "label": x.attrib["label"].split(" ")[0].translate(nopunct), 1598
1608 "useuri": x.attrib["useuri"], 1599 for assembly in real_root.findall("assembly"):
1609 "meta": metadata_from_node(x.find("metadata")), 1600 genome_el = assembly.find('genome')
1610 } 1601
1611 for x in ass.findall("metadata/genomes/genome") 1602 is_remote = genome_el.attrib.get("remote", "false") == "true"
1612 ] 1603
1613 primaryGenome = jc.process_genomes(genomes) 1604 genome = {
1614 if not default_session_data.get(primaryGenome, None): 1605 "path": genome_el.attrib["path"] if is_remote else os.path.realpath(genome_el.attrib["path"]),
1615 default_session_data[primaryGenome] = { 1606 "meta": metadata_from_node(genome_el.find("metadata")),
1616 "tracks": [], 1607 "label": genome_el.attrib["label"],
1617 "style": {}, 1608 }
1618 "style_labels": {}, 1609
1619 "visibility": { 1610 cytobands = None
1620 "default_on": [], 1611 cytobands_el = genome_el.find("cytobands")
1621 "default_off": [], 1612 if cytobands_el is not None and "path" in cytobands_el.attrib:
1622 }, 1613 cytobands = cytobands_el.attrib["path"]
1623 } 1614
1624 for track in ass.find("tracks"): 1615 ref_name_aliases = None
1616 ref_name_aliases_el = genome_el.find("ref_name_aliases")
1617 if ref_name_aliases_el is not None and "path" in ref_name_aliases_el.attrib:
1618 ref_name_aliases = ref_name_aliases_el.attrib["path"]
1619
1620 log.debug("Processing genome", genome)
1621 genome["uniq_id"] = jc.add_assembly(genome["path"], genome["label"], is_remote, cytobands, ref_name_aliases)
1622
1623 default_tracks_on = []
1624
1625 track_num = 0
1626 for track in assembly.findall("tracks/track"):
1625 track_conf = {} 1627 track_conf = {}
1626 track_conf["trackfiles"] = [] 1628 track_conf["trackfiles"] = []
1627 track_conf["assemblyNames"] = primaryGenome 1629 track_conf["track_num"] = track_num
1628 is_multi_bigwig = False 1630
1631 trackfiles = track.findall("files/trackFile") or []
1632
1633 is_multi = False
1634 multi_paths = []
1635 multi_type = None
1636 multi_metadata = {}
1629 try: 1637 try:
1630 if track.find("options/wiggle/multibigwig") and ( 1638 multi_in_xml = track.find("options/multitrack")
1631 track.find("options/wiggle/multibigwig").text == "True" 1639 if multi_in_xml is not None and parse_style_conf(multi_in_xml):
1632 ): 1640 is_multi = True
1633 is_multi_bigwig = True 1641 multi_paths = []
1634 multi_bigwig_paths = [] 1642 multi_type = trackfiles[0].attrib["ext"]
1635 except KeyError: 1643 except KeyError:
1636 pass 1644 pass
1637 1645
1638 trackfiles = track.findall("files/trackFile") 1646 is_remote = False
1639 if trackfiles: 1647 if trackfiles:
1640 for x in trackfiles: 1648 for x in trackfiles:
1641 isBed = False 1649 if is_multi:
1642 if x.attrib["ext"] == "bed": 1650 is_remote = x.attrib.get("remote", "false") == "true"
1643 isBed = True 1651 multi_paths.append(
1644 track_conf["label"] = "%s_%d" % ( 1652 (x.attrib["label"], x.attrib["path"] if is_remote else os.path.realpath(x.attrib["path"]))
1645 x.attrib["label"].translate(nopunct), 1653 )
1646 trackI, 1654 multi_metadata.update(metadata_from_node(x.find("metadata")))
1647 ) 1655 else:
1648 trackI += 1 1656 metadata = metadata_from_node(x.find("metadata"))
1649 track_conf["useuri"] = x.attrib["useuri"] 1657 is_remote = x.attrib.get("remote", "false") == "true"
1650 if is_multi_bigwig: 1658 track_conf["trackfiles"].append(
1651 multi_bigwig_paths.append(
1652 ( 1659 (
1653 track_conf["label"].translate(nopunct), 1660 x.attrib["path"] if is_remote else os.path.realpath(x.attrib["path"]),
1654 track_conf["useuri"], 1661 x.attrib["ext"],
1655 os.path.realpath(x.attrib["path"]), 1662 x.attrib["label"],
1663 metadata,
1656 ) 1664 )
1657 ) 1665 )
1658 else: 1666 else:
1659 metadata = metadata_from_node(x.find("metadata")) 1667 # For tracks without files (sparql, gc)
1660 track_conf["dataset_id"] = metadata.get("dataset_id", "None") 1668 track_conf["trackfiles"].append(
1661 if x.attrib["useuri"].lower() == "yes": 1669 (
1662 tfa = ( 1670 "", # N/A, no path for sparql or gc
1663 x.attrib["path"], 1671 track.attrib["format"],
1664 x.attrib["ext"], 1672 track.find("options/label").text,
1665 x.attrib["useuri"], 1673 {},
1666 track_conf["label"],
1667 metadata,
1668 )
1669 else:
1670 tfa = (
1671 os.path.realpath(x.attrib["path"]),
1672 x.attrib["ext"],
1673 x.attrib["useuri"],
1674 track_conf["label"],
1675 metadata,
1676 )
1677 track_conf["trackfiles"].append(tfa)
1678
1679 if is_multi_bigwig:
1680 metadata = metadata_from_node(x.find("metadata"))
1681
1682 track_conf["trackfiles"].append(
1683 (
1684 multi_bigwig_paths, # Passing an array of paths to represent as one track
1685 "bigwig_multiple",
1686 "MultiBigWig", # Giving an hardcoded name for now
1687 {}, # No metadata for multiple bigwig
1688 )
1689 ) 1674 )
1675 )
1676
1677 if is_multi:
1678 etal_tracks_nb = len(multi_paths[1:])
1679 multi_label = f"{multi_paths[0][0]} + {etal_tracks_nb} other track{'s' if etal_tracks_nb > 1 else ''}"
1680
1681 track_conf["trackfiles"].append(
1682 (
1683 multi_paths, # Passing an array of paths to represent as one track
1684 multi_type, # First file type
1685 multi_label, # First file label
1686 multi_metadata, # Mix of all metadata for multiple bigwig => only last file metadata coming from galaxy + custom oness
1687 )
1688 )
1690 track_conf["category"] = track.attrib["cat"] 1689 track_conf["category"] = track.attrib["cat"]
1691 track_conf["format"] = track.attrib["format"] 1690 track_conf["format"] = track.attrib["format"]
1691 track_conf["style"] = {
1692 item.tag: parse_style_conf(item) for item in (track.find("options/style") or [])
1693 }
1694
1695 track_conf["style"] = {
1696 item.tag: parse_style_conf(item) for item in (track.find("options/style") or [])
1697 }
1698
1699 track_conf["style_labels"] = {
1700 item.tag: parse_style_conf(item)
1701 for item in (track.find("options/style_labels") or [])
1702 }
1703 track_conf["formatdetails"] = {
1704 item.tag: parse_style_conf(item) for item in (track.find("options/formatdetails") or [])
1705 }
1706
1692 track_conf["conf"] = etree_to_dict(track.find("options")) 1707 track_conf["conf"] = etree_to_dict(track.find("options"))
1693 keys = jc.process_annotations(track_conf) 1708
1694 if keys: 1709 track_conf["remote"] = is_remote
1695 for key in keys: 1710
1696 vis = track.attrib.get("visibility", "default_off") 1711 track_labels = jc.process_annotations(track_conf, genome)
1697 if not vis: 1712
1698 vis = "default_off" 1713 if track.attrib["visibility"] == "default_on":
1699 default_session_data[primaryGenome]["visibility"][vis].append(key) 1714 for tlabel in track_labels:
1700 trakdat = jc.tracksToAdd[primaryGenome] 1715 default_tracks_on.append(tlabel)
1701 stile = {} 1716
1702 for trak in trakdat: 1717 track_num += 1
1703 if trak["trackId"] == key: 1718
1704 stile = trak.get("style", {}) 1719 default_loc = assembly.find("defaultLocation").text
1705 if len(track.find("options/style")) > 0: 1720
1706 for item in track.find("options/style"): 1721 jc.default_views[genome['uniq_id']] = jc.add_default_view_genome(genome, default_loc, default_tracks_on)
1707 if item.text: 1722
1708 stile[item.tag] = parse_style_conf(item) 1723 if jc.use_synteny_viewer:
1709 logging.debug("stile=%s" % stile) 1724 synteny_view = jc.add_default_view_synteny(list(jc.default_views.values()), jc.synteny_tracks)
1710 default_session_data[primaryGenome]["style"][key] = stile 1725
1711 default_session_data[primaryGenome]["tracks"].append(key) 1726 views_for_session = jc._load_old_synteny_views()
1712 default_session_data["defaultLocation"] = root.find( 1727
1713 "metadata/general/defaultLocation" 1728 views_for_session.append(synteny_view)
1714 ).text 1729 else:
1715 default_session_data["session_name"] = root.find( 1730 old_views = jc._load_old_genome_views()
1716 "metadata/general/session_name" 1731
1717 ).text 1732 for old_view in old_views:
1718 logging.debug("default_session=%s" % (json.dumps(default_session_data, indent=2))) 1733 if old_view not in jc.default_views:
1719 jc.zipOut = root.find("metadata/general/zipOut").text == "true" 1734 jc.default_views[old_view] = old_views[old_view]
1720 jc.bpPerPx = int(root.find("metadata/general/bpPerPx").text) 1735
1736 views_for_session = list(jc.default_views.values())
1737
1721 general_data = { 1738 general_data = {
1722 "analytics": root.find("metadata/general/analytics").text, 1739 "analytics": real_root.find("metadata/general/analytics").text,
1723 "primary_color": root.find("metadata/general/primary_color").text, 1740 "primary_color": real_root.find("metadata/general/primary_color").text,
1724 "secondary_color": root.find("metadata/general/secondary_color").text, 1741 "secondary_color": real_root.find("metadata/general/secondary_color").text,
1725 "tertiary_color": root.find("metadata/general/tertiary_color").text, 1742 "tertiary_color": real_root.find("metadata/general/tertiary_color").text,
1726 "quaternary_color": root.find("metadata/general/quaternary_color").text, 1743 "quaternary_color": real_root.find("metadata/general/quaternary_color").text,
1727 "font_size": root.find("metadata/general/font_size").text, 1744 "font_size": real_root.find("metadata/general/font_size").text,
1728 } 1745 }
1746
1747 jc.add_default_session(views_for_session)
1729 jc.add_general_configuration(general_data) 1748 jc.add_general_configuration(general_data)
1730 jc.add_default_session(default_session_data) 1749 jc.add_plugins(jc.plugins)
1731 trackconf = jc.config_json.get("tracks", []) 1750 jc.text_index()
1732 for gnome in jc.genome_names:
1733 gtracks = jc.tracksToAdd[gnome]
1734 if len(gtracks) > 0:
1735 logging.debug(
1736 "for genome %s adding gtracks %s"
1737 % (gnome, json.dumps(gtracks, indent=2))
1738 )
1739 trackconf += gtracks
1740 jc.config_json["tracks"] = trackconf
1741 assconf = jc.config_json.get("assemblies", [])
1742 assconf += jc.assemblies
1743 jc.config_json["assemblies"] = assconf
1744 logging.debug(
1745 "assmeta=%s, first_contigs=%s, assemblies=%s, gnames=%s, trackidlist=%s, tracks=%s"
1746 % (
1747 jc.assmeta,
1748 jc.ass_first_contigs,
1749 json.dumps(assconf, indent=2),
1750 jc.genome_names,
1751 jc.trackIdlist,
1752 json.dumps(trackconf, indent=2),
1753 )
1754 )
1755 jc.write_config()
1756 # note that this can be left in the config.json but has NO EFFECT if add_defsess_to_index is called.
1757 # jc.add_defsess_to_index(default_session_data)
1758 # this command line tool appears currently broken - or at least not working here.
1759 # jc.text_index()