Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 136:93fdd696c281 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 4fa86613193c985e0cb9a8fc795c56b8bc7b8532
author | iuc |
---|---|
date | Thu, 02 Oct 2025 10:20:29 +0000 |
parents | 21bb464c1d53 |
children |
comparison
equal
deleted
inserted
replaced
135:21bb464c1d53 | 136:93fdd696c281 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | |
3 import argparse | 2 import argparse |
4 import binascii | 3 import csv |
5 import copy | |
6 import datetime | 4 import datetime |
5 import hashlib | |
7 import json | 6 import json |
8 import logging | 7 import logging |
9 import os | 8 import os |
10 import re | 9 import re |
11 import shutil | 10 import shutil |
12 import ssl | |
13 import string | |
14 import struct | |
15 import subprocess | 11 import subprocess |
16 import urllib.request | |
17 import xml.etree.ElementTree as ET | 12 import xml.etree.ElementTree as ET |
18 from collections import defaultdict | 13 from collections import defaultdict |
19 | 14 |
15 import requests | |
16 | |
17 | |
20 logging.basicConfig(level=logging.DEBUG) | 18 logging.basicConfig(level=logging.DEBUG) |
21 log = logging.getLogger("jbrowse") | 19 log = logging.getLogger("jbrowse") |
22 | |
23 JB2VER = "v2.15.4" | |
24 # version pinned if cloning - but not cloning now | |
25 logCommands = True | |
26 # useful for seeing what's being written but not for production setups | |
27 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") | 20 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") |
28 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) | 21 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) |
29 GALAXY_INFRASTRUCTURE_URL = None | 22 GALAXY_INFRASTRUCTURE_URL = None |
30 mapped_chars = { | 23 mapped_chars = { |
31 ">": "__gt__", | 24 ">": "__gt__", |
38 "}": "__cc__", | 31 "}": "__cc__", |
39 "@": "__at__", | 32 "@": "__at__", |
40 "#": "__pd__", | 33 "#": "__pd__", |
41 "": "__cn__", | 34 "": "__cn__", |
42 } | 35 } |
43 | |
44 | |
45 INDEX_TEMPLATE = """<!doctype html> | |
46 <html lang="en" style="height:100%"> | |
47 <head> | |
48 <meta charset="utf-8"/> | |
49 <link rel="shortcut icon" href="./favicon.ico"/> | |
50 <meta name="viewport" content="width=device-width,initial-scale=1"/> | |
51 <meta name="theme-color" content="#000000"/> | |
52 <meta name="description" content="A fast and flexible genome browser"/> | |
53 <link rel="manifest" href="./manifest.json"/> | |
54 <title>JBrowse</title> | |
55 </script> | |
56 </head> | |
57 <body style="overscroll-behavior:none; height:100%; margin: 0;"> | |
58 <iframe | |
59 id="jbframe" | |
60 title="JBrowse2" | |
61 frameborder="0" | |
62 width="100%" | |
63 height="100%" | |
64 src='index_noview.html?config=config.json__SESSION_SPEC__'> | |
65 </iframe> | |
66 </body> | |
67 </html> | |
68 """ | |
69 | |
70 | |
71 class ColorScaling(object): | |
72 | |
73 COLOR_FUNCTION_TEMPLATE = """ | |
74 function(feature, variableName, glyphObject, track) {{ | |
75 var score = {score}; | |
76 {opacity} | |
77 return 'rgba({red}, {green}, {blue}, ' + opacity + ')'; | |
78 }} | |
79 """ | |
80 | |
81 COLOR_FUNCTION_TEMPLATE_QUAL = r""" | |
82 function(feature, variableName, glyphObject, track) {{ | |
83 var search_up = function self(sf, attr){{ | |
84 if(sf.get(attr) !== undefined){{ | |
85 return sf.get(attr); | |
86 }} | |
87 if(sf.parent() === undefined) {{ | |
88 return; | |
89 }}else{{ | |
90 return self(sf.parent(), attr); | |
91 }} | |
92 }}; | |
93 | |
94 var search_down = function self(sf, attr){{ | |
95 if(sf.get(attr) !== undefined){{ | |
96 return sf.get(attr); | |
97 }} | |
98 if(sf.children() === undefined) {{ | |
99 return; | |
100 }}else{{ | |
101 var kids = sf.children(); | |
102 for(var child_idx in kids){{ | |
103 var x = self(kids[child_idx], attr); | |
104 if(x !== undefined){{ | |
105 return x; | |
106 }} | |
107 }} | |
108 return; | |
109 }} | |
110 }}; | |
111 | |
112 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color}); | |
113 var score = (search_up(feature, 'score') || search_down(feature, 'score')); | |
114 {opacity} | |
115 if(score === undefined){{ opacity = 1; }} | |
116 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color); | |
117 var red = parseInt(result[1], 16); | |
118 var green = parseInt(result[2], 16); | |
119 var blue = parseInt(result[3], 16); | |
120 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }} | |
121 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')'; | |
122 }} | |
123 """ | |
124 | |
125 OPACITY_MATH = { | |
126 "linear": """ | |
127 var opacity = (score - ({min})) / (({max}) - ({min})); | |
128 """, | |
129 "logarithmic": """ | |
130 var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min})); | |
131 """, | |
132 "blast": """ | |
133 var opacity = 0; | |
134 if(score == 0.0) {{ | |
135 opacity = 1; | |
136 }} else {{ | |
137 opacity = (20 - Math.log10(score)) / 180; | |
138 }} | |
139 """, | |
140 } | |
141 | |
142 BREWER_COLOUR_IDX = 0 | |
143 BREWER_COLOUR_SCHEMES = [ | |
144 (166, 206, 227), | |
145 (31, 120, 180), | |
146 (178, 223, 138), | |
147 (51, 160, 44), | |
148 (251, 154, 153), | |
149 (227, 26, 28), | |
150 (253, 191, 111), | |
151 (255, 127, 0), | |
152 (202, 178, 214), | |
153 (106, 61, 154), | |
154 (255, 255, 153), | |
155 (177, 89, 40), | |
156 (228, 26, 28), | |
157 (55, 126, 184), | |
158 (77, 175, 74), | |
159 (152, 78, 163), | |
160 (255, 127, 0), | |
161 ] | |
162 | |
163 BREWER_DIVERGING_PALLETES = { | |
164 "BrBg": ("#543005", "#003c30"), | |
165 "PiYg": ("#8e0152", "#276419"), | |
166 "PRGn": ("#40004b", "#00441b"), | |
167 "PuOr": ("#7f3b08", "#2d004b"), | |
168 "RdBu": ("#67001f", "#053061"), | |
169 "RdGy": ("#67001f", "#1a1a1a"), | |
170 "RdYlBu": ("#a50026", "#313695"), | |
171 "RdYlGn": ("#a50026", "#006837"), | |
172 "Spectral": ("#9e0142", "#5e4fa2"), | |
173 } | |
174 | |
175 def __init__(self): | |
176 self.brewer_colour_idx = 0 | |
177 | |
178 def rgb_from_hex(self, hexstr): | |
179 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back | |
180 return struct.unpack("BBB", binascii.unhexlify(hexstr)) | |
181 | |
182 def min_max_gff(self, gff_file): | |
183 min_val = None | |
184 max_val = None | |
185 with open(gff_file, "r") as handle: | |
186 for line in handle: | |
187 try: | |
188 value = float(line.split("\t")[5]) | |
189 min_val = min(value, (min_val or value)) | |
190 max_val = max(value, (max_val or value)) | |
191 | |
192 if value < min_val: | |
193 min_val = value | |
194 | |
195 if value > max_val: | |
196 max_val = value | |
197 except Exception: | |
198 pass | |
199 return min_val, max_val | |
200 | |
201 def hex_from_rgb(self, r, g, b): | |
202 return "#%02x%02x%02x" % (r, g, b) | |
203 | |
204 def _get_colours(self): | |
205 r, g, b = self.BREWER_COLOUR_SCHEMES[ | |
206 self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES) | |
207 ] | |
208 self.brewer_colour_idx += 1 | |
209 return r, g, b | |
210 | |
211 def parse_menus(self, track): | |
212 trackConfig = {"menuTemplate": [{}, {}, {}, {}]} | |
213 | |
214 if "menu" in track["menus"]: | |
215 menu_list = [track["menus"]["menu"]] | |
216 if isinstance(track["menus"]["menu"], list): | |
217 menu_list = track["menus"]["menu"] | |
218 | |
219 for m in menu_list: | |
220 tpl = { | |
221 "action": m["action"], | |
222 "label": m.get("label", "{name}"), | |
223 "iconClass": m.get("iconClass", "dijitIconBookmark"), | |
224 } | |
225 if "url" in m: | |
226 tpl["url"] = m["url"] | |
227 if "content" in m: | |
228 tpl["content"] = m["content"] | |
229 if "title" in m: | |
230 tpl["title"] = m["title"] | |
231 | |
232 trackConfig["menuTemplate"].append(tpl) | |
233 | |
234 return trackConfig | |
235 | |
236 def parse_colours(self, track, trackFormat, gff3=None): | |
237 # Wiggle tracks have a bicolor pallete | |
238 trackConfig = {"style": {}} | |
239 if trackFormat == "wiggle": | |
240 | |
241 trackConfig["style"]["pos_color"] = track["wiggle"]["color_pos"] | |
242 trackConfig["style"]["neg_color"] = track["wiggle"]["color_neg"] | |
243 | |
244 if trackConfig["style"]["pos_color"] == "__auto__": | |
245 trackConfig["style"]["neg_color"] = self.hex_from_rgb( | |
246 *self._get_colours() | |
247 ) | |
248 trackConfig["style"]["pos_color"] = self.hex_from_rgb( | |
249 *self._get_colours() | |
250 ) | |
251 | |
252 # Wiggle tracks can change colour at a specified place | |
253 bc_pivot = track["wiggle"]["bicolor_pivot"] | |
254 if bc_pivot not in ("mean", "zero"): | |
255 # The values are either one of those two strings | |
256 # or a number | |
257 bc_pivot = float(bc_pivot) | |
258 trackConfig["bicolor_pivot"] = bc_pivot | |
259 elif "scaling" in track: | |
260 if track["scaling"]["method"] == "ignore": | |
261 if track["scaling"]["scheme"]["color"] != "__auto__": | |
262 trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"] | |
263 else: | |
264 trackConfig["style"]["color"] = self.hex_from_rgb( | |
265 *self._get_colours() | |
266 ) | |
267 else: | |
268 # Scored method | |
269 algo = track["scaling"]["algo"] | |
270 # linear, logarithmic, blast | |
271 scales = track["scaling"]["scales"] | |
272 # type __auto__, manual (min, max) | |
273 scheme = track["scaling"]["scheme"] | |
274 # scheme -> (type (opacity), color) | |
275 # ================================== | |
276 # GENE CALLS OR BLAST | |
277 # ================================== | |
278 if trackFormat == "blast": | |
279 red, green, blue = self._get_colours() | |
280 color_function = self.COLOR_FUNCTION_TEMPLATE.format( | |
281 **{ | |
282 "score": "feature._parent.get('score')", | |
283 "opacity": self.OPACITY_MATH["blast"], | |
284 "red": red, | |
285 "green": green, | |
286 "blue": blue, | |
287 } | |
288 ) | |
289 trackConfig["style"]["color"] = color_function.replace("\n", "") | |
290 elif trackFormat == "gene_calls": | |
291 # Default values, based on GFF3 spec | |
292 min_val = 0 | |
293 max_val = 1000 | |
294 # Get min/max and build a scoring function since JBrowse doesn't | |
295 if scales["type"] == "automatic" or scales["type"] == "__auto__": | |
296 min_val, max_val = self.min_max_gff(gff3) | |
297 else: | |
298 min_val = scales.get("min", 0) | |
299 max_val = scales.get("max", 1000) | |
300 | |
301 if scheme["color"] == "__auto__": | |
302 user_color = "undefined" | |
303 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
304 elif scheme["color"].startswith("#"): | |
305 user_color = "'%s'" % self.hex_from_rgb( | |
306 *self.rgb_from_hex(scheme["color"][1:]) | |
307 ) | |
308 auto_color = "undefined" | |
309 else: | |
310 user_color = "undefined" | |
311 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
312 | |
313 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format( | |
314 **{ | |
315 "opacity": self.OPACITY_MATH[algo].format( | |
316 **{"max": max_val, "min": min_val} | |
317 ), | |
318 "user_spec_color": user_color, | |
319 "auto_gen_color": auto_color, | |
320 } | |
321 ) | |
322 | |
323 trackConfig["style"]["color"] = color_function.replace("\n", "") | |
324 return trackConfig | |
325 | 36 |
326 | 37 |
327 def etree_to_dict(t): | 38 def etree_to_dict(t): |
328 if t is None: | 39 if t is None: |
329 return {} | 40 return {} |
351 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) | 62 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) |
352 | 63 |
353 | 64 |
354 def metadata_from_node(node): | 65 def metadata_from_node(node): |
355 metadata = {} | 66 metadata = {} |
356 try: | 67 |
357 if len(node.findall("dataset")) != 1: | 68 if len(node.findall("dataset")) == 1: |
358 # exit early | 69 |
359 return metadata | 70 for key, value in node.findall("dataset")[0].attrib.items(): |
360 except Exception: | 71 metadata[f"dataset_{key}"] = value |
361 return {} | 72 |
362 | |
363 for key, value in node.findall("dataset")[0].attrib.items(): | |
364 metadata["dataset_%s" % key] = value | |
365 | |
366 if node.findall("history"): | |
367 for key, value in node.findall("history")[0].attrib.items(): | 73 for key, value in node.findall("history")[0].attrib.items(): |
368 metadata["history_%s" % key] = value | 74 metadata[f"history_{key}"] = value |
369 | 75 |
370 if node.findall("metadata"): | |
371 for key, value in node.findall("metadata")[0].attrib.items(): | 76 for key, value in node.findall("metadata")[0].attrib.items(): |
372 metadata["metadata_%s" % key] = value | 77 metadata[f"metadata_{key}"] = value |
78 | |
79 for key, value in node.findall("tool")[0].attrib.items(): | |
80 metadata[f"tool_{key}"] = value | |
81 | |
373 # Additional Mappings applied: | 82 # Additional Mappings applied: |
374 metadata["dataset_edam_format"] = ( | 83 metadata[ |
375 '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( | 84 "dataset_edam_format" |
376 metadata["dataset_edam_format"], metadata["dataset_file_ext"] | 85 ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( |
377 ) | 86 metadata["dataset_edam_format"], metadata["dataset_file_ext"] |
378 ) | 87 ) |
379 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( | 88 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( |
380 metadata["history_user_email"] | 89 metadata["history_user_email"] |
381 ) | 90 ) |
382 metadata["hist_name"] = metadata["history_display_name"] | 91 metadata[ |
383 metadata["history_display_name"] = ( | 92 "history_display_name" |
384 '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( | 93 ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( |
385 galaxy=GALAXY_INFRASTRUCTURE_URL, | 94 galaxy=GALAXY_INFRASTRUCTURE_URL, |
386 encoded_hist_id=metadata.get("history_id", "not available"), | 95 encoded_hist_id=metadata["history_id"], |
387 hist_name=metadata.get("history_display_name", "not available"), | 96 hist_name=metadata["history_display_name"], |
388 ) | |
389 ) | 97 ) |
390 if node.findall("tool"): | 98 metadata[ |
391 for key, value in node.findall("tool")[0].attrib.items(): | 99 "tool_tool" |
392 metadata["tool_%s" % key] = value | 100 ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format( |
393 metadata["tool_tool"] = ( | 101 galaxy=GALAXY_INFRASTRUCTURE_URL, |
394 '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}{tool_version}</a>'.format( | 102 encoded_id=metadata["dataset_id"], |
395 galaxy=GALAXY_INFRASTRUCTURE_URL, | 103 tool_id=metadata["tool_tool_id"], |
396 encoded_id=metadata.get("dataset_id", ""), | 104 # tool_version=metadata['tool_tool_version'], |
397 tool_id=metadata.get("tool_tool_id", ""), | |
398 tool_version=metadata.get("tool_tool_version", ""), | |
399 ) | |
400 ) | 105 ) |
106 | |
107 # Load additional metadata from a TSV file if any given by user | |
108 bonus = node.findall("bonus") | |
109 if bonus and "src" in bonus[0].attrib and bonus[0].attrib["src"]: | |
110 with open(bonus[0].attrib["src"], "r") as bonus_tsv: | |
111 bonus_content = csv.reader(bonus_tsv, delimiter="\t", quotechar='"') | |
112 for row in bonus_content: | |
113 if len(row) == 2: | |
114 if row[0] in metadata: | |
115 log.warning(f"Overwriting existing metadata {row[0]} with value from bonus file {row[1]}") | |
116 metadata[row[0]] = row[1] | |
117 else: | |
118 log.warning(f"Skipping invalid bonus metadata line: {row}") | |
119 | |
401 return metadata | 120 return metadata |
402 | 121 |
403 | 122 |
404 class JbrowseConnector(object): | 123 class JbrowseConnector(object): |
405 def __init__(self, outdir, jbrowse2path): | 124 def __init__(self, jbrowse, outdir, update): |
406 self.bpPerPx = 50 | 125 self.jbrowse = jbrowse |
407 self.trackCounter = 0 # to avoid name clashes | 126 self.outdir = outdir |
408 self.assemblies = [] # these require more than a few line diff. | 127 self.update = update |
409 self.assmeta = {} | 128 |
410 self.ass_first_contigs = ( | 129 self.tracksToIndex = {} |
411 [] | 130 |
412 ) # for default session - these are read as first line of the assembly .fai | 131 # This is the id of the current assembly |
413 self.giURL = GALAXY_INFRASTRUCTURE_URL | 132 self.assembly_ids = {} |
414 self.outdir = os.path.abspath(outdir) | 133 |
415 self.jbrowse2path = jbrowse2path | 134 self.default_views = {} |
416 os.makedirs(self.outdir, exist_ok=True) | 135 |
417 self.genome_names = [] | 136 self.plugins = [] |
418 self.trackIdlist = [] | 137 |
419 self.tracksToAdd = {} | 138 self.use_synteny_viewer = False |
420 self.config_json = {} | 139 |
421 self.config_json_file = os.path.join(outdir, "config.json") | 140 self.synteny_tracks = [] |
422 self.clone_jbrowse(realclone=False) | 141 |
142 self.clone_jbrowse(self.jbrowse, self.outdir) | |
143 | |
144 # If upgrading, look at the existing data | |
145 self.check_existing(self.outdir) | |
423 | 146 |
424 def get_cwd(self, cwd): | 147 def get_cwd(self, cwd): |
425 if cwd: | 148 if cwd: |
426 return self.outdir | 149 return self.outdir |
427 else: | 150 else: |
428 return subprocess.check_output(["pwd"]).decode("utf-8").strip() | 151 return subprocess.check_output(['pwd']).decode('utf-8').strip() |
152 # return None | |
429 | 153 |
430 def subprocess_check_call(self, command, output=None, cwd=True): | 154 def subprocess_check_call(self, command, output=None, cwd=True): |
431 if output: | 155 if output: |
432 if logCommands: | 156 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)} > {output.name}") |
433 log.debug( | |
434 "cd %s && %s > %s", self.get_cwd(cwd), " ".join(command), output | |
435 ) | |
436 subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output) | 157 subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output) |
437 else: | 158 else: |
438 if logCommands: | 159 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)}") |
439 log.debug("cd %s && %s", self.get_cwd(cwd), " ".join(command)) | |
440 subprocess.check_call(command, cwd=self.get_cwd(cwd)) | 160 subprocess.check_call(command, cwd=self.get_cwd(cwd)) |
441 | 161 |
442 def subprocess_popen(self, command, cwd=True): | 162 def subprocess_popen(self, command, cwd=True): |
443 if logCommands: | 163 log.debug(f"cd {self.get_cwd(cwd)} && {command}") |
444 log.debug(command) | |
445 p = subprocess.Popen( | 164 p = subprocess.Popen( |
446 command, | 165 command, |
447 cwd=self.outdir, | 166 cwd=self.get_cwd(cwd), |
448 shell=True, | 167 shell=True, |
449 stdin=subprocess.PIPE, | 168 stdin=subprocess.PIPE, |
450 stdout=subprocess.PIPE, | 169 stdout=subprocess.PIPE, |
451 stderr=subprocess.PIPE, | 170 stderr=subprocess.PIPE, |
452 ) | 171 ) |
453 output, err = p.communicate() | 172 output, err = p.communicate() |
454 retcode = p.returncode | 173 retcode = p.returncode |
455 if retcode != 0: | 174 if retcode != 0: |
456 log.error(command) | 175 log.error(f"cd {self.get_cwd(cwd)} && {command}") |
457 log.error(output) | 176 log.error(output) |
458 log.error(err) | 177 log.error(err) |
459 raise RuntimeError("Command failed with exit code %s" % (retcode)) | 178 raise RuntimeError(f"Command failed with exit code {retcode}") |
460 | 179 |
461 def subprocess_check_output(self, command): | 180 def subprocess_check_output(self, command, cwd=True): |
462 if logCommands: | 181 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)}") |
463 log.debug(" ".join(command)) | 182 return subprocess.check_output(command, cwd=self.get_cwd(cwd)) |
464 return subprocess.check_output(command, cwd=self.outdir) | |
465 | 183 |
466 def symlink_or_copy(self, src, dest): | 184 def symlink_or_copy(self, src, dest): |
467 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( | 185 # Use to support symlinking in jbrowse1, in jbrowse2 prefer to use remote uri |
468 os.environ["GALAXY_JBROWSE_SYMLINKS"] | 186 cmd = ["cp", src, dest] |
469 ): | 187 |
470 cmd = ["ln", "-s", src, dest] | 188 return self.subprocess_check_call(cmd) |
189 | |
190 def _prepare_track_style(self, xml_conf): | |
191 style_data = { | |
192 "type": "LinearBasicDisplay", # No ideal default, but should be overwritten anyway | |
193 } | |
194 | |
195 if "display" in xml_conf["style"]: | |
196 style_data["type"] = xml_conf["style"]["display"] | |
197 | |
198 style_data["displayId"] = f"{xml_conf['label']}_{style_data['type']}" | |
199 | |
200 style_data.update(self._prepare_renderer_config(style_data["type"], xml_conf["style"])) | |
201 | |
202 return {"displays": [style_data]} | |
203 | |
204 def _prepare_renderer_config(self, display_type, xml_conf): | |
205 | |
206 style_data = {} | |
207 | |
208 # if display_type in ("LinearBasicDisplay", "LinearVariantDisplay"): | |
209 # TODO LinearVariantDisplay does not understand these options when written in config.json | |
210 if display_type in ("LinearBasicDisplay"): | |
211 | |
212 # Doc: https://jbrowse.org/jb2/docs/config/svgfeaturerenderer/ | |
213 style_data["renderer"] = { | |
214 "type": "SvgFeatureRenderer", | |
215 "showLabels": xml_conf.get("show_labels", True), | |
216 "showDescriptions": xml_conf.get("show_descriptions", True), | |
217 "labels": { | |
218 "name": xml_conf.get("labels_name", "jexl:get(feature,'name') || get(feature,'id')"), | |
219 "description": xml_conf.get("descriptions_name", "jexl:get(feature,'note') || get(feature,'description')") | |
220 }, | |
221 "displayMode": xml_conf.get("display_mode", "normal"), | |
222 "maxHeight": xml_conf.get("max_height", 1200), | |
223 } | |
224 | |
225 elif display_type == "LinearArcDisplay": | |
226 | |
227 # Doc: https://jbrowse.org/jb2/docs/config/arcrenderer/ | |
228 style_data["renderer"] = { | |
229 "type": "ArcRenderer", | |
230 "label": xml_conf.get("labels_name", "jexl:get(feature,'score')"), | |
231 "displayMode": xml_conf.get("display_mode", "arcs"), | |
232 } | |
233 | |
234 elif display_type == "LinearWiggleDisplay": | |
235 | |
236 wig_renderer = xml_conf.get("renderer", "xyplot") | |
237 style_data["defaultRendering"] = wig_renderer | |
238 | |
239 elif display_type == "MultiLinearWiggleDisplay": | |
240 | |
241 wig_renderer = xml_conf.get("renderer", "multirowxy") | |
242 style_data["defaultRendering"] = wig_renderer | |
243 | |
244 elif display_type == "LinearSNPCoverageDisplay": | |
245 | |
246 # Does not work | |
247 # style_data["renderer"] = { | |
248 # "type": "SNPCoverageRenderer", | |
249 # "displayCrossHatches": xml_conf.get("display_cross_hatches", True), | |
250 # } | |
251 | |
252 style_data["scaleType"] = xml_conf.get("scale_type", "linear") | |
253 if "min_score" in xml_conf: | |
254 style_data["minScore"] = xml_conf["min_score"] | |
255 | |
256 if "max_score" in xml_conf: | |
257 style_data["maxScore"] = xml_conf["max_score"] | |
258 | |
259 # Doc: https://jbrowse.org/jb2/docs/config/snpcoveragerenderer/ | |
260 | |
261 return style_data | |
262 | |
263 def _prepare_format_details(self, xml_conf): | |
264 formatDetails = { | |
265 } | |
266 | |
267 if "feature" in xml_conf["formatdetails"]: | |
268 feat_jexl = xml_conf["formatdetails"]["feature"] | |
269 for key, value in mapped_chars.items(): | |
270 feat_jexl = feat_jexl.replace(value, key) | |
271 formatDetails["feature"] = feat_jexl | |
272 | |
273 if "subfeature" in xml_conf["formatdetails"]: | |
274 sfeat_jexl = xml_conf["formatdetails"]["subfeature"] | |
275 for key, value in mapped_chars.items(): | |
276 sfeat_jexl = sfeat_jexl.replace(value, key) | |
277 formatDetails["subfeatures"] = sfeat_jexl | |
278 | |
279 if "depth" in xml_conf["formatdetails"]: | |
280 formatDetails["depth"] = int(xml_conf["formatdetails"]["depth"]) | |
281 | |
282 return {"formatDetails": formatDetails} | |
283 | |
284 def _prepare_track_metadata(self, xml_conf): | |
285 metadata = { | |
286 } | |
287 | |
288 metadata = xml_conf["metadata"] | |
289 | |
290 return {"metadata": metadata} | |
291 | |
292 def check_existing(self, destination): | |
293 existing = os.path.join(destination, "config.json") | |
294 if os.path.exists(existing): | |
295 with open(existing, "r") as existing_conf: | |
296 conf = json.load(existing_conf) | |
297 if "assemblies" in conf: | |
298 for assembly in conf["assemblies"]: | |
299 if "name" in assembly: | |
300 | |
301 # Look for a default scaffold | |
302 default_seq = None | |
303 if 'defaultSession' in conf and 'views' in conf['defaultSession']: | |
304 for view in conf['defaultSession']['views']: | |
305 if 'init' in view and 'assembly' in view['init'] and 'loc' in view['init']: | |
306 if view['init']['assembly'] == assembly["name"]: | |
307 default_seq = view['init']['loc'].split(":")[0] | |
308 if "views" in view: | |
309 subviews = view["views"] | |
310 for subview in subviews: | |
311 if 'init' in subview and 'assembly' in subview['init'] and 'loc' in subview['init']: | |
312 if subview['init']['assembly'] == assembly["name"]: | |
313 default_seq = subview['init']['loc'].split(":")[0] | |
314 | |
315 self.assembly_ids[assembly["name"]] = default_seq | |
316 | |
317 def _load_old_genome_views(self): | |
318 | |
319 views = {} | |
320 | |
321 config_path = os.path.join(self.outdir, "config.json") | |
322 with open(config_path, "r") as config_file: | |
323 config_json = json.load(config_file) | |
324 | |
325 # Find default synteny views existing from a previous jbrowse dataset | |
326 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']: | |
327 for view in config_json['defaultSession']['views']: | |
328 if view['type'] != "LinearSyntenyView": | |
329 if 'init' in view and 'assembly' in view['init']: | |
330 views[view['init']['assembly']] = view | |
331 | |
332 return views | |
333 | |
334 def _load_old_synteny_views(self): | |
335 | |
336 views = [] | |
337 | |
338 config_path = os.path.join(self.outdir, "config.json") | |
339 with open(config_path, "r") as config_file: | |
340 config_json = json.load(config_file) | |
341 | |
342 # Find default synteny views existing from a previous jbrowse dataset | |
343 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']: | |
344 for view in config_json['defaultSession']['views']: | |
345 if view['type'] == "LinearSyntenyView": | |
346 views.append(view) | |
347 | |
348 return views | |
349 | |
350 def add_assembly(self, path, label, is_remote=False, cytobands=None, ref_name_aliases=None): | |
351 | |
352 if not is_remote: | |
353 # Find a non-existing filename for the new genome | |
354 # (to avoid colision when upgrading an existing instance) | |
355 rel_seq_path = os.path.join("data", label) | |
356 seq_path = os.path.join(self.outdir, rel_seq_path) | |
357 fn_try = 1 | |
358 while ( | |
359 os.path.exists(seq_path + ".fasta") | |
360 or os.path.exists(seq_path + ".fasta.gz") | |
361 or os.path.exists(seq_path + ".fasta.gz.fai") | |
362 or os.path.exists(seq_path + ".fasta.gz.gzi") | |
363 ): | |
364 rel_seq_path = os.path.join("data", f"{label}{fn_try}") | |
365 seq_path = os.path.join(self.outdir, rel_seq_path) | |
366 fn_try += 1 | |
367 | |
368 # Check if the assembly already exists from a previous run (--update mode) | |
369 if self.update: | |
370 | |
371 config_path = os.path.join(self.outdir, "config.json") | |
372 with open(config_path, "r") as config_file: | |
373 config_json = json.load(config_file) | |
374 | |
375 for asby in config_json['assemblies']: | |
376 if asby['name'] == label: | |
377 | |
378 # Find default views existing for this assembly | |
379 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']: | |
380 for view in config_json['defaultSession']['views']: | |
381 if 'init' in view and 'assembly' in view['init']: | |
382 if view['init']['assembly'] == label: | |
383 | |
384 log.info("Found existing assembly from existing JBrowse2 instance, preserving it") | |
385 | |
386 self.default_views[view['init']['assembly']] = view | |
387 | |
388 return label | |
389 | |
390 # Copy ref alias file if any | |
391 if ref_name_aliases: | |
392 copied_ref_name_aliases = seq_path + ".aliases" | |
393 shutil.copy(ref_name_aliases, copied_ref_name_aliases) | |
394 copied_ref_name_aliases = rel_seq_path + ".aliases" | |
395 | |
396 # Copy cytobands file if any | |
397 if cytobands: | |
398 copied_cytobands = seq_path + ".cytobands" | |
399 shutil.copy(cytobands, copied_cytobands) | |
400 copied_cytobands = rel_seq_path + ".cytobands" | |
401 | |
402 # Find a non-existing label for the new genome | |
403 # (to avoid colision when upgrading an existing instance) | |
404 lab_try = 1 | |
405 uniq_label = label | |
406 while uniq_label in self.assembly_ids: | |
407 uniq_label = label + str(lab_try) | |
408 lab_try += 1 | |
409 | |
410 if is_remote: | |
411 | |
412 # Find a default scaffold to display | |
413 with requests.get(path + ".fai", stream=True) as response: | |
414 response.raise_for_status() | |
415 first_seq = next(response.iter_lines()) | |
416 first_seq = first_seq.decode("utf-8").split('\t')[0] | |
417 | |
418 self.assembly_ids[uniq_label] = first_seq | |
419 | |
420 # We assume we just need to suffix url with .fai and .gzi for indexes. | |
421 cmd_jb = [ | |
422 "jbrowse", | |
423 "add-assembly", | |
424 "--name", | |
425 uniq_label, | |
426 "--type", | |
427 "bgzipFasta", | |
428 "--out", | |
429 self.outdir, | |
430 "--skipCheck", | |
431 ] | |
432 | |
433 if ref_name_aliases: | |
434 cmd_jb.extend([ | |
435 "--refNameAliases", | |
436 copied_ref_name_aliases, | |
437 ]) | |
438 | |
439 cmd_jb.append(path) # Path is an url in remote mode | |
440 | |
441 self.subprocess_check_call(cmd_jb) | |
471 else: | 442 else: |
472 cmd = ["cp", src, dest] | 443 # Find a default scaffold to display |
473 | 444 with open(path, "r") as fa_handle: |
474 return self.subprocess_check_call(cmd) | 445 fa_header = fa_handle.readline()[1:].strip().split(" ")[0] |
475 | 446 |
476 def _prepare_track_style(self, trackDict): | 447 self.assembly_ids[uniq_label] = fa_header |
477 | 448 |
478 style_data = { | 449 copied_genome = seq_path + ".fasta" |
479 "type": "LinearBasicDisplay", | 450 shutil.copy(path, copied_genome) |
480 "displayId": "%s-LinearBasicDisplay" % trackDict["trackId"], | 451 |
452 # Compress with bgzip | |
453 cmd = ["bgzip", copied_genome] | |
454 self.subprocess_check_call(cmd) | |
455 | |
456 # FAI Index | |
457 cmd = ["samtools", "faidx", copied_genome + ".gz"] | |
458 self.subprocess_check_call(cmd) | |
459 | |
460 cmd_jb = [ | |
461 "jbrowse", | |
462 "add-assembly", | |
463 "--load", | |
464 "inPlace", | |
465 "--name", | |
466 uniq_label, | |
467 "--type", | |
468 "bgzipFasta", | |
469 "--out", | |
470 self.outdir, | |
471 "--skipCheck", | |
472 ] | |
473 | |
474 if ref_name_aliases: | |
475 cmd_jb.extend([ | |
476 "--refNameAliases", | |
477 copied_ref_name_aliases, | |
478 ]) | |
479 | |
480 cmd_jb.append(rel_seq_path + ".fasta.gz") | |
481 | |
482 self.subprocess_check_call(cmd_jb) | |
483 | |
484 if cytobands: | |
485 self.add_cytobands(uniq_label, copied_cytobands) | |
486 | |
487 return uniq_label | |
488 | |
489 def add_cytobands(self, assembly_name, cytobands_path): | |
490 | |
491 config_path = os.path.join(self.outdir, "config.json") | |
492 with open(config_path, "r") as config_file: | |
493 config_json = json.load(config_file) | |
494 | |
495 config_data = {} | |
496 | |
497 config_data["cytobands"] = { | |
498 "adapter": { | |
499 "type": "CytobandAdapter", | |
500 "cytobandLocation": { | |
501 "uri": cytobands_path | |
502 } | |
503 } | |
481 } | 504 } |
482 | 505 |
483 if trackDict.get("displays", None): # use first if multiple like bed | 506 filled_assemblies = [] |
484 style_data["type"] = trackDict["displays"][0]["type"] | 507 for assembly in config_json["assemblies"]: |
485 style_data["displayId"] = trackDict["displays"][0]["displayId"] | 508 if assembly["name"] == assembly_name: |
486 return style_data | 509 assembly.update(config_data) |
487 | 510 filled_assemblies.append(assembly) |
488 def getNrow(self, url): | 511 config_json["assemblies"] = filled_assemblies |
489 useuri = url.startswith("https://") or url.startswith("http://") | 512 |
490 if not useuri: | 513 with open(config_path, "w") as config_file: |
491 fl = open(url, "r").readlines() | 514 json.dump(config_json, config_file, indent=2) |
492 nrow = len(fl) | 515 |
516 def text_index(self): | |
517 | |
518 for ass in self.tracksToIndex: | |
519 tracks = self.tracksToIndex[ass] | |
520 args = [ | |
521 "jbrowse", | |
522 "text-index", | |
523 "--target", | |
524 self.outdir, | |
525 "--assemblies", | |
526 ass, | |
527 ] | |
528 | |
529 tracks = ",".join(tracks) | |
530 if tracks: | |
531 args += ["--tracks", tracks] | |
532 | |
533 log.info(f"-----> Running text-index on assembly {ass} and tracks {tracks}") | |
534 | |
535 # Only run index if we want to index at least one | |
536 # If --tracks is not specified, it will index everything | |
537 self.subprocess_check_call(args) | |
538 | |
539 def add_gc_content(self, parent, trackData, **kwargs): | |
540 | |
541 adapter = {} | |
542 existing = os.path.join(self.outdir, "config.json") | |
543 if os.path.exists(existing): | |
544 with open(existing, "r") as existing_conf: | |
545 conf = json.load(existing_conf) | |
546 if "assemblies" in conf: | |
547 for assembly in conf["assemblies"]: | |
548 if assembly.get('name', "") == parent['uniq_id']: | |
549 adapter = assembly.get('sequence', {}).get('adapter', {}) | |
550 | |
551 json_track_data = { | |
552 "type": "GCContentTrack", | |
553 "trackId": trackData["label"], | |
554 "name": trackData["key"], | |
555 "adapter": adapter, | |
556 "category": [trackData["category"]], | |
557 "assemblyNames": [parent['uniq_id']], | |
558 } | |
559 | |
560 style_json = self._prepare_track_style(trackData) | |
561 | |
562 json_track_data.update(style_json) | |
563 | |
564 self.subprocess_check_call( | |
565 [ | |
566 "jbrowse", | |
567 "add-track-json", | |
568 "--target", | |
569 self.outdir, | |
570 json.dumps(json_track_data), | |
571 ] | |
572 ) | |
573 | |
574 def add_bigwig(self, parent, data, trackData, wiggleOpts, **kwargs): | |
575 | |
576 if trackData['remote']: | |
577 rel_dest = data | |
493 else: | 578 else: |
494 try: | 579 rel_dest = os.path.join("data", trackData["label"] + ".bw") |
495 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) | 580 dest = os.path.join(self.outdir, rel_dest) |
496 scontext.check_hostname = False | 581 self.symlink_or_copy(os.path.realpath(data), dest) |
497 scontext.verify_mode = ssl.VerifyMode.CERT_NONE | 582 |
498 with urllib.request.urlopen(url, context=scontext) as f: | 583 style_json = self._prepare_track_style(trackData) |
499 fl = f.readlines() | 584 |
500 nrow = len(fl) | 585 track_metadata = self._prepare_track_metadata(trackData) |
501 except Exception: | 586 |
502 nrow = 0 | 587 style_json.update(track_metadata) |
503 logging.debug("getNrow %s returning %d" % (url, nrow)) | 588 |
504 return nrow | 589 self._add_track( |
505 | 590 trackData["label"], |
506 def process_genomes(self, genomes): | 591 trackData["key"], |
507 assembly = [] | 592 trackData["category"], |
508 assmeta = [] | 593 rel_dest, |
509 useuri = False | 594 parent, |
510 primaryGenome = None | 595 config=style_json, |
511 for i, genome_node in enumerate(genomes): | 596 remote=trackData['remote'] |
512 this_genome = {} | 597 ) |
513 if genome_node["useuri"] == "yes": | 598 |
514 useuri = True | 599 def add_bigwig_multi(self, parent, data_files, trackData, wiggleOpts, **kwargs): |
515 genome_name = genome_node["label"].strip() | 600 |
516 if len(genome_name) == 0: | 601 subadapters = [] |
517 genome_name = os.path.splitext(os.path.basename(genome_node["path"]))[0] | 602 |
518 if len(genome_name.split()) > 1: | 603 sub_num = 0 |
519 genome_name = genome_name.split()[0] | 604 for data in data_files: |
520 # spaces and cruft break scripts when substituted | 605 if trackData['remote']: |
521 if not primaryGenome: | 606 rel_dest = data[1] |
522 primaryGenome = genome_name | 607 else: |
523 if genome_name not in self.genome_names: | 608 rel_dest = os.path.join("data", f"{trackData['label']}_sub{sub_num}.bw") |
524 self.genome_names.append(genome_name) | 609 dest = os.path.join(self.outdir, rel_dest) |
525 fapath = genome_node["path"] | 610 self.symlink_or_copy(os.path.realpath(data[1]), dest) |
526 if not useuri: | 611 |
527 fapath = os.path.realpath(fapath) | 612 subadapters.append({ |
528 assem, first_contig = self.make_assembly(fapath, genome_name, useuri) | 613 "type": "BigWigAdapter", |
529 assembly.append(assem) | 614 "name": data[0], |
530 self.ass_first_contigs.append(first_contig) | 615 "bigWigLocation": { |
531 if genome_name == primaryGenome: # first one | 616 "uri": rel_dest, |
532 this_genome["genome_name"] = genome_name # first one for all tracks | 617 "locationType": "UriLocation" |
533 this_genome["genome_sequence_adapter"] = assem["sequence"][ | 618 } |
534 "adapter" | 619 }) |
535 ] | 620 sub_num += 1 |
536 this_genome["genome_firstcontig"] = first_contig | 621 |
537 assmeta.append(this_genome) | 622 json_track_data = { |
538 self.assemblies += assembly | 623 "type": "MultiQuantitativeTrack", |
539 self.assmeta[primaryGenome] = assmeta | 624 "trackId": trackData["label"], |
540 self.tracksToAdd[primaryGenome] = [] | 625 "name": trackData["key"], |
541 return primaryGenome | 626 "adapter": { |
542 | 627 "type": "MultiWiggleAdapter", |
543 def make_assembly(self, fapath, gname, useuri): | 628 "subadapters": subadapters |
544 if useuri: | 629 }, |
545 faname = fapath | 630 "category": [trackData["category"]], |
546 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) | 631 "assemblyNames": [parent['uniq_id']], |
547 scontext.check_hostname = False | 632 } |
548 scontext.verify_mode = ssl.VerifyMode.CERT_NONE | 633 |
549 with urllib.request.urlopen(url=faname + ".fai", context=scontext) as f: | 634 style_json = self._prepare_track_style(trackData) |
550 fl = f.readline() | 635 |
551 contig = fl.decode("utf8").strip() | 636 json_track_data.update(style_json) |
552 # Merlin 172788 8 60 61 | 637 |
638 track_metadata = self._prepare_track_metadata(trackData) | |
639 | |
640 json_track_data.update(track_metadata) | |
641 | |
642 self.subprocess_check_call( | |
643 [ | |
644 "jbrowse", | |
645 "add-track-json", | |
646 "--target", | |
647 self.outdir, | |
648 json.dumps(json_track_data), | |
649 ] | |
650 ) | |
651 | |
652 # Anything ending in "am" (Bam or Cram) | |
653 def add_xam(self, parent, data, trackData, xamOpts, index=None, ext="bam", **kwargs): | |
654 index_ext = "bai" | |
655 if ext == "cram": | |
656 index_ext = "crai" | |
657 | |
658 if trackData['remote']: | |
659 rel_dest = data | |
660 # Index will be set automatically as xam url + xai .suffix by add-track cmd | |
553 else: | 661 else: |
554 faname = gname + ".fa.gz" | 662 rel_dest = os.path.join("data", trackData["label"] + f".{ext}") |
555 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 663 dest = os.path.join(self.outdir, rel_dest) |
556 cmd = "bgzip -k -i -c -I '%s.gzi' '%s' > '%s'" % (fadest, fapath, fadest) | 664 self.symlink_or_copy(os.path.realpath(data), dest) |
557 subprocess.run(cmd, shell=True) | 665 |
558 cmd = ["samtools", "faidx", fadest] | 666 if index is not None and os.path.exists(os.path.realpath(index)): |
667 # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
668 self.subprocess_check_call( | |
669 ["cp", os.path.realpath(index), dest + f".{index_ext}"] | |
670 ) | |
671 else: | |
672 # Can happen in exotic condition | |
673 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | |
674 # => no index generated by galaxy, but there might be one next to the symlink target | |
675 # this trick allows to skip the bam sorting made by galaxy if already done outside | |
676 if os.path.exists(os.path.realpath(data) + f".{index_ext}"): | |
677 self.symlink_or_copy( | |
678 os.path.realpath(data) + f".{index_ext}", dest + f".{index_ext}" | |
679 ) | |
680 else: | |
681 log.warn( | |
682 f"Could not find a bam index (.{index_ext} file) for {data}" | |
683 ) | |
684 | |
685 style_json = self._prepare_track_style(trackData) | |
686 | |
687 track_metadata = self._prepare_track_metadata(trackData) | |
688 | |
689 style_json.update(track_metadata) | |
690 | |
691 self._add_track( | |
692 trackData["label"], | |
693 trackData["key"], | |
694 trackData["category"], | |
695 rel_dest, | |
696 parent, | |
697 config=style_json, | |
698 remote=trackData['remote'] | |
699 ) | |
700 | |
701 def add_vcf(self, parent, data, trackData, vcfOpts={}, zipped=False, **kwargs): | |
702 if trackData['remote']: | |
703 rel_dest = data | |
704 else: | |
705 if zipped: | |
706 rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz") | |
707 dest = os.path.join(self.outdir, rel_dest) | |
708 shutil.copy(os.path.realpath(data), dest) | |
709 else: | |
710 rel_dest = os.path.join("data", trackData["label"] + ".vcf") | |
711 dest = os.path.join(self.outdir, rel_dest) | |
712 shutil.copy(os.path.realpath(data), dest) | |
713 | |
714 cmd = ["bgzip", dest] | |
715 self.subprocess_check_call(cmd) | |
716 cmd = ["tabix", dest + ".gz"] | |
717 self.subprocess_check_call(cmd) | |
718 | |
719 rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz") | |
720 | |
721 style_json = self._prepare_track_style(trackData) | |
722 | |
723 formatdetails = self._prepare_format_details(trackData) | |
724 | |
725 style_json.update(formatdetails) | |
726 | |
727 track_metadata = self._prepare_track_metadata(trackData) | |
728 | |
729 style_json.update(track_metadata) | |
730 | |
731 self._add_track( | |
732 trackData["label"], | |
733 trackData["key"], | |
734 trackData["category"], | |
735 rel_dest, | |
736 parent, | |
737 config=style_json, | |
738 remote=trackData['remote'] | |
739 ) | |
740 | |
741 def add_gff(self, parent, data, format, trackData, gffOpts, **kwargs): | |
742 if trackData['remote']: | |
743 rel_dest = data | |
744 else: | |
745 rel_dest = os.path.join("data", trackData["label"] + ".gff") | |
746 dest = os.path.join(self.outdir, rel_dest) | |
747 rel_dest = rel_dest + ".gz" | |
748 | |
749 self._sort_gff(data, dest) | |
750 | |
751 style_json = self._prepare_track_style(trackData) | |
752 | |
753 formatdetails = self._prepare_format_details(trackData) | |
754 | |
755 style_json.update(formatdetails) | |
756 | |
757 track_metadata = self._prepare_track_metadata(trackData) | |
758 | |
759 style_json.update(track_metadata) | |
760 | |
761 if gffOpts.get('index', 'false') in ("yes", "true", "True"): | |
762 if parent['uniq_id'] not in self.tracksToIndex: | |
763 self.tracksToIndex[parent['uniq_id']] = [] | |
764 self.tracksToIndex[parent['uniq_id']].append(trackData["label"]) | |
765 | |
766 self._add_track( | |
767 trackData["label"], | |
768 trackData["key"], | |
769 trackData["category"], | |
770 rel_dest, | |
771 parent, | |
772 config=style_json, | |
773 remote=trackData['remote'] | |
774 ) | |
775 | |
776 def add_bed(self, parent, data, format, trackData, gffOpts, **kwargs): | |
777 if trackData['remote']: | |
778 rel_dest = data | |
779 else: | |
780 rel_dest = os.path.join("data", trackData["label"] + ".bed") | |
781 dest = os.path.join(self.outdir, rel_dest) | |
782 rel_dest = rel_dest + ".gz" | |
783 | |
784 self._sort_bed(data, dest) | |
785 | |
786 style_json = self._prepare_track_style(trackData) | |
787 | |
788 formatdetails = self._prepare_format_details(trackData) | |
789 | |
790 style_json.update(formatdetails) | |
791 | |
792 track_metadata = self._prepare_track_metadata(trackData) | |
793 | |
794 style_json.update(track_metadata) | |
795 | |
796 if gffOpts.get('index', 'false') in ("yes", "true", "True"): | |
797 if parent['uniq_id'] not in self.tracksToIndex: | |
798 self.tracksToIndex[parent['uniq_id']] = [] | |
799 self.tracksToIndex[parent['uniq_id']].append(trackData["label"]) | |
800 | |
801 self._add_track( | |
802 trackData["label"], | |
803 trackData["key"], | |
804 trackData["category"], | |
805 rel_dest, | |
806 parent, | |
807 config=style_json, | |
808 remote=trackData['remote'] | |
809 ) | |
810 | |
811 def add_paf(self, parent, data, trackData, pafOpts, **kwargs): | |
812 | |
813 if trackData['remote']: | |
814 rel_dest = data | |
815 | |
816 if rel_dest.endswith('pif') or rel_dest.endswith('pif.gz'): | |
817 adapter = "pif" | |
818 else: | |
819 adapter = "paf" | |
820 else: | |
821 rel_dest = os.path.join("data", trackData["label"] + ".pif.gz") | |
822 dest = os.path.join(self.outdir, rel_dest) | |
823 | |
824 cmd = ["jbrowse", "make-pif", "--out", dest, os.path.realpath(data)] | |
559 self.subprocess_check_call(cmd) | 825 self.subprocess_check_call(cmd) |
560 contig = open(fadest + ".fai", "r").readline().strip() | 826 |
561 adapter = { | 827 adapter = "pif" |
562 "type": "BgzipFastaAdapter", | 828 |
563 "fastaLocation": { | 829 if trackData["style"]["display"] == "LinearBasicDisplay": |
564 "uri": faname, | 830 # Normal style track |
565 }, | 831 |
566 "faiLocation": { | 832 json_track_data = { |
567 "uri": faname + ".fai", | 833 "type": "SyntenyTrack", |
568 }, | 834 "trackId": trackData["label"], |
569 "gziLocation": { | 835 "name": trackData["key"], |
570 "uri": faname + ".gzi", | 836 "adapter": { |
571 }, | 837 "type": "PairwiseIndexedPAFAdapter", |
838 "pifGzLocation": { | |
839 "uri": rel_dest, | |
840 }, | |
841 "index": { | |
842 "location": { | |
843 "uri": rel_dest + ".tbi", | |
844 } | |
845 }, | |
846 }, | |
847 "category": [trackData["category"]], | |
848 "assemblyNames": [parent['uniq_id']], | |
849 } | |
850 else: | |
851 # Synteny viewer | |
852 | |
853 json_track_data = { | |
854 "type": "SyntenyTrack", | |
855 "trackId": trackData["label"], | |
856 "name": trackData["key"], | |
857 "adapter": { | |
858 "assemblyNames": [ | |
859 parent['uniq_id'], | |
860 "", # Placeholder until we know the next genome id | |
861 ], | |
862 }, | |
863 "category": [trackData["category"]], | |
864 "assemblyNames": [ | |
865 parent['uniq_id'], | |
866 "", # Placeholder until we know the next genome id | |
867 ] | |
868 } | |
869 | |
870 if adapter == "pif": | |
871 json_track_data["adapter"].update({ | |
872 "type": "PairwiseIndexedPAFAdapter", | |
873 "pifGzLocation": { | |
874 "uri": rel_dest, | |
875 }, | |
876 "index": { | |
877 "location": { | |
878 "uri": rel_dest + ".tbi", | |
879 } | |
880 }, | |
881 }) | |
882 else: | |
883 json_track_data["adapter"].update({ | |
884 "type": "PAFAdapter", | |
885 "pafLocation": { | |
886 "uri": rel_dest, | |
887 }, | |
888 }) | |
889 | |
890 style_json = self._prepare_track_style(trackData) | |
891 | |
892 json_track_data.update(style_json) | |
893 | |
894 track_metadata = self._prepare_track_metadata(trackData) | |
895 | |
896 json_track_data.update(track_metadata) | |
897 | |
898 if trackData["style"]["display"] == "LinearBasicDisplay": | |
899 self.subprocess_check_call( | |
900 [ | |
901 "jbrowse", | |
902 "add-track-json", | |
903 "--target", | |
904 self.outdir, | |
905 json.dumps(json_track_data), | |
906 ] | |
907 ) | |
908 else: | |
909 self.synteny_tracks.append(json_track_data) | |
910 | |
911 def add_hic(self, parent, data, trackData, hicOpts, **kwargs): | |
912 if trackData['remote']: | |
913 rel_dest = data | |
914 else: | |
915 rel_dest = os.path.join("data", trackData["label"] + ".hic") | |
916 dest = os.path.join(self.outdir, rel_dest) | |
917 self.symlink_or_copy(os.path.realpath(data), dest) | |
918 | |
919 style_json = self._prepare_track_style(trackData) | |
920 | |
921 track_metadata = self._prepare_track_metadata(trackData) | |
922 | |
923 style_json.update(track_metadata) | |
924 | |
925 self._add_track( | |
926 trackData["label"], | |
927 trackData["key"], | |
928 trackData["category"], | |
929 rel_dest, | |
930 parent, | |
931 config=style_json, | |
932 remote=trackData['remote'] | |
933 ) | |
934 | |
935 def add_maf(self, parent, data, trackData, mafOpts, **kwargs): | |
936 | |
937 # Add needed plugin | |
938 plugin_def = { | |
939 "name": "MafViewer", | |
940 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js" | |
572 } | 941 } |
573 first_contig = contig.split()[:2] | 942 self.plugins.append(plugin_def) |
574 first_contig.insert(0, gname) | 943 |
575 trackDict = { | 944 rel_dest = os.path.join("data", trackData["label"] + ".maf") |
576 "name": gname, | 945 dest = os.path.join(self.outdir, rel_dest) |
577 "sequence": { | 946 |
578 "type": "ReferenceSequenceTrack", | 947 assembly_name = mafOpts.get("assembly_name", "") |
579 "trackId": gname, | 948 if not assembly_name: |
580 "adapter": adapter, | 949 # Guess from assembly |
581 }, | 950 assembly_name = parent['uniq_id'] |
582 "displays": [ | 951 |
583 { | 952 self._convert_maf(data, dest, assembly_name) |
584 "type": "LinearReferenceSequenceDisplay", | 953 |
585 "displayId": "%s-LinearReferenceSequenceDisplay" % gname, | 954 # Extract samples list |
586 }, | |
587 { | |
588 "type": "LinearGCContentDisplay", | |
589 "displayId": "%s-LinearGCContentDisplay" % gname, | |
590 }, | |
591 ], | |
592 } | |
593 return (trackDict, first_contig) | |
594 | |
595 def add_default_view(self): | |
596 cmd = [ | |
597 "jbrowse", | |
598 "set-default-session", | |
599 "-s", | |
600 self.config_json_file, | |
601 "-t", | |
602 ",".join(self.trackIdlist), | |
603 "-n", | |
604 "JBrowse2 in Galaxy", | |
605 "--target", | |
606 self.config_json_file, | |
607 "-v", | |
608 " LinearGenomeView", | |
609 ] | |
610 self.subprocess_check_call(cmd) | |
611 | |
612 def write_config(self): | |
613 with open(self.config_json_file, "w") as fp: | |
614 json.dump(self.config_json, fp, indent=2) | |
615 | |
616 def text_index(self): | |
617 # Index tracks | |
618 e = os.environ | |
619 e["SHELL"] = "/bin/sh" | |
620 cmd = ["jbrowse", "text-index"] | |
621 subprocess.run(cmd, env=e, shell=True) | |
622 | |
623 def add_hic(self, data, trackData): | |
624 """ | |
625 HiC adapter. | |
626 https://github.com/aidenlab/hic-format/blob/master/HiCFormatV9.md | |
627 for testing locally, these work: | |
628 HiC data is from https://s3.amazonaws.com/igv.broadinstitute.org/data/hic/intra_nofrag_30.hic | |
629 using hg19 reference track as a | |
630 'BgzipFastaAdapter' | |
631 fastaLocation: | |
632 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz', | |
633 faiLocation: | |
634 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai', | |
635 gziLocation: | |
636 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi', | |
637 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438 | |
638 | |
639 """ | |
640 tId = trackData["label"] | |
641 wasCool = trackData["wasCool"] | |
642 # can be served - if public. | |
643 # dsId = trackData["metadata"]["dataset_id"] | |
644 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) | |
645 useuri = trackData["useuri"].lower() == "yes" | |
646 logging.debug("wasCool=%s, data=%s, tId=%s" % (wasCool, data, tId)) | |
647 if useuri: | |
648 uri = data | |
649 else: | |
650 uri = tId + ".hic" | |
651 if not wasCool: | |
652 dest = os.path.join(self.outdir, uri) | |
653 if not os.path.exists(dest): | |
654 cmd = ["cp", data, dest] | |
655 self.subprocess_check_call(cmd) | |
656 else: | |
657 logging.error("not wasCool but %s exists" % dest) | |
658 categ = trackData["category"] | |
659 trackDict = { | |
660 "type": "HicTrack", | |
661 "trackId": tId, | |
662 "name": trackData["name"], | |
663 "assemblyNames": [trackData["assemblyNames"]], | |
664 "displays": [ | |
665 { | |
666 "type": "LinearHicDisplay", | |
667 "displayId": "%s-LinearHicDisplay" % tId, | |
668 } | |
669 ], | |
670 "category": [ | |
671 categ, | |
672 ], | |
673 "adapter": {"type": "HicAdapter", "hicLocation": {"uri": uri}}, | |
674 } | |
675 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
676 self.trackIdlist.append(tId) | |
677 | |
678 def add_maf(self, data, trackData): | |
679 """ | |
680 from https://github.com/cmdcolin/maf2bed | |
681 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name | |
682 e.g. hg38.chr1 in the sequence identifiers. | |
683 need the reference id - eg hg18, for maf2bed.pl as the first parameter | |
684 """ | |
685 tId = trackData["label"] | |
686 mafPlugin = { | |
687 "plugins": [ | |
688 { | |
689 "name": "MafViewer", | |
690 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", | |
691 } | |
692 ] | |
693 } | |
694 categ = trackData["category"] | |
695 fname = tId | |
696 dest = os.path.join(self.outdir, fname) | |
697 gname = trackData["assemblyNames"] | |
698 | |
699 cmd = [ | |
700 "bash", | |
701 os.path.join(INSTALLED_TO, "convertMAF.sh"), | |
702 data, | |
703 gname, | |
704 INSTALLED_TO, | |
705 dest, | |
706 ] | |
707 self.subprocess_check_call(cmd) | |
708 mafs = open(data, "r").readlines() | 955 mafs = open(data, "r").readlines() |
709 mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))] | 956 mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))] |
710 samp = [x.split()[1] for x in mafss if len(x.split()) > 0] | 957 samp = [x.split()[1] for x in mafss if len(x.split()) > 0] |
711 sampu = list(dict.fromkeys(samp)) | 958 sampu = list(dict.fromkeys(samp)) |
712 samples = [x.split(".")[0] for x in sampu] | 959 samples = [x.split(".")[0] for x in sampu] |
713 samples.sort() | 960 samples.sort() |
714 if logCommands: | 961 |
715 logging.debug( | 962 json_track_data = { |
716 "$$$$ cmd=%s, mafss=%s samp=%s samples=%s" | |
717 % (" ".join(cmd), mafss, samp, samples) | |
718 ) | |
719 trackDict = { | |
720 "type": "MafTrack", | 963 "type": "MafTrack", |
721 "trackId": tId, | 964 "trackId": trackData["label"], |
722 "name": trackData["name"], | 965 "name": trackData["key"], |
723 "category": [ | |
724 categ, | |
725 ], | |
726 "adapter": { | 966 "adapter": { |
727 "type": "MafTabixAdapter", | 967 "type": "MafTabixAdapter", |
728 "samples": samples, | 968 "samples": samples, |
729 "bedGzLocation": { | 969 "bedGzLocation": { |
730 "uri": fname + ".sorted.bed.gz", | 970 "uri": rel_dest + ".gz", |
731 }, | 971 }, |
732 "index": { | 972 "index": { |
733 "location": { | 973 "location": { |
734 "uri": fname + ".sorted.bed.gz.tbi", | 974 "uri": rel_dest + ".gz.tbi", |
735 }, | 975 }, |
736 }, | 976 }, |
737 }, | 977 }, |
738 "assemblyNames": [trackData["assemblyNames"]], | 978 "category": [trackData["category"]], |
739 "displays": [ | 979 "assemblyNames": [parent['uniq_id']], |
740 { | |
741 "type": "LinearBasicDisplay", | |
742 "displayId": "%s-LinearBasicDisplay" % tId, | |
743 }, | |
744 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, | |
745 ], | |
746 } | 980 } |
747 style_json = self._prepare_track_style(trackDict) | 981 |
748 trackDict["style"] = style_json | 982 style_json = self._prepare_track_style(trackData) |
749 self.tracksToAdd[gname].append(copy.copy(trackDict)) | 983 |
750 self.trackIdlist.append(tId) | 984 json_track_data.update(style_json) |
751 if self.config_json.get("plugins", None): | 985 |
752 self.config_json["plugins"].append(mafPlugin["plugins"][0]) | 986 track_metadata = self._prepare_track_metadata(trackData) |
753 else: | 987 |
754 self.config_json.update(mafPlugin) | 988 json_track_data.update(track_metadata) |
989 | |
990 self.subprocess_check_call( | |
991 [ | |
992 "jbrowse", | |
993 "add-track-json", | |
994 "--target", | |
995 self.outdir, | |
996 json.dumps(json_track_data), | |
997 ] | |
998 ) | |
999 | |
1000 def add_sparql(self, parent, url, query, query_refnames, trackData): | |
1001 json_track_data = { | |
1002 "type": "FeatureTrack", | |
1003 "trackId": trackData["label"], | |
1004 "name": trackData["key"], | |
1005 "adapter": { | |
1006 "type": "SPARQLAdapter", | |
1007 "endpoint": {"uri": url, "locationType": "UriLocation"}, | |
1008 "queryTemplate": query, | |
1009 }, | |
1010 "category": [trackData["category"]], | |
1011 "assemblyNames": [parent['uniq_id']], | |
1012 } | |
1013 | |
1014 if query_refnames: | |
1015 json_track_data["adapter"]["refNamesQueryTemplate"]: query_refnames | |
1016 | |
1017 # TODO handle metadata somehow for sparql too | |
1018 | |
1019 self.subprocess_check_call( | |
1020 [ | |
1021 "jbrowse", | |
1022 "add-track-json", | |
1023 "--target", | |
1024 self.outdir, | |
1025 json.dumps(json_track_data), | |
1026 ] | |
1027 ) | |
1028 | |
1029 def _add_track(self, track_id, label, category, path, assembly, config=None, trackType=None, load_action="inPlace", assemblies=None, remote=False): | |
1030 """ | |
1031 Adds a track to config.json using Jbrowse add-track cli | |
1032 | |
1033 By default, using `--load inPlace`: the file is supposed to be already placed at the `path` relative to | |
1034 the outdir, `jbrowse add-track` will not touch it and trust us that the file is there and ready to use. | |
1035 | |
1036 With `load_action` parameter, you can ask `jbrowse add-track` to copy/move/symlink the file for you. | |
1037 Not done by default because we often need more control on file copying/symlink for specific cases (indexes, symlinks of symlinks, ...) | |
1038 """ | |
1039 | |
1040 cmd = [ | |
1041 "jbrowse", | |
1042 "add-track", | |
1043 "--name", | |
1044 label, | |
1045 "--category", | |
1046 category, | |
1047 "--target", | |
1048 self.outdir, | |
1049 "--trackId", | |
1050 track_id, | |
1051 "--assemblyNames", | |
1052 assemblies if assemblies else assembly['uniq_id'], | |
1053 ] | |
1054 | |
1055 if not remote: | |
1056 cmd.append("--load") | |
1057 cmd.append(load_action) | |
1058 | |
1059 if config: | |
1060 cmd.append("--config") | |
1061 cmd.append(json.dumps(config)) | |
1062 | |
1063 if trackType: | |
1064 cmd.append("--trackType") | |
1065 cmd.append(trackType) | |
1066 | |
1067 cmd.append(path) | |
1068 | |
1069 self.subprocess_check_call(cmd) | |
755 | 1070 |
756 def _sort_gff(self, data, dest): | 1071 def _sort_gff(self, data, dest): |
757 # Only index if not already done | 1072 # Only index if not already done |
758 if not os.path.exists(dest): | 1073 if not os.path.exists(dest): |
759 e = os.environ | 1074 # Not using jbrowse sort-gff because it uses sort and has the problem exposed on https://github.com/tao-bioinfo/gff3sort |
760 e["SHELL"] = "/bin/sh" | 1075 cmd = f"gff3sort.pl --precise '{data}' | grep -v \"^$\" > '{dest}'" |
761 cmd = "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest) | 1076 self.subprocess_popen(cmd, cwd=False) |
762 subprocess.run(cmd, env=e, shell=True) | 1077 |
763 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) | 1078 self.subprocess_check_call(["bgzip", "-f", dest], cwd=False) |
764 | 1079 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"], cwd=False) |
765 def add_gff(self, data, trackData): | |
766 tId = trackData["label"] | |
767 useuri = trackData["useuri"].lower() == "yes" | |
768 if useuri: | |
769 url = trackData["path"] | |
770 else: | |
771 url = tId + ".gz" | |
772 dest = os.path.join(self.outdir, url) | |
773 self._sort_gff(data, dest) | |
774 categ = trackData["category"] | |
775 trackDict = { | |
776 "type": "FeatureTrack", | |
777 "trackId": tId, | |
778 "name": trackData["name"], | |
779 "assemblyNames": [trackData["assemblyNames"]], | |
780 "category": [ | |
781 categ, | |
782 ], | |
783 "adapter": { | |
784 "type": "Gff3TabixAdapter", | |
785 "gffGzLocation": { | |
786 "uri": url, | |
787 }, | |
788 "index": { | |
789 "location": { | |
790 "uri": url + ".tbi", | |
791 } | |
792 }, | |
793 }, | |
794 "displays": [ | |
795 { | |
796 "type": "LinearBasicDisplay", | |
797 "displayId": "%s-LinearBasicDisplay" % tId, | |
798 }, | |
799 { | |
800 "type": "LinearArcDisplay", | |
801 "displayId": "%s-LinearArcDisplay" % tId, | |
802 }, | |
803 ], | |
804 } | |
805 style_json = self._prepare_track_style(trackDict) | |
806 trackDict["style"] = style_json | |
807 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
808 self.trackIdlist.append(tId) | |
809 | |
810 def add_bigwig(self, data, trackData): | |
811 tId = trackData["label"] | |
812 useuri = trackData["useuri"].lower() == "yes" | |
813 if useuri: | |
814 url = data | |
815 else: | |
816 url = tId | |
817 # slashes in names cause path trouble | |
818 dest = os.path.join(self.outdir, url) | |
819 cmd = ["cp", data, dest] | |
820 self.subprocess_check_call(cmd) | |
821 bwloc = {"uri": url} | |
822 categ = trackData["category"] | |
823 trackDict = { | |
824 "type": "QuantitativeTrack", | |
825 "trackId": tId, | |
826 "name": trackData["name"], | |
827 "category": [ | |
828 categ, | |
829 ], | |
830 "assemblyNames": [trackData["assemblyNames"]], | |
831 "adapter": { | |
832 "type": "BigWigAdapter", | |
833 "bigWigLocation": bwloc, | |
834 }, | |
835 "displays": [ | |
836 { | |
837 "type": "LinearWiggleDisplay", | |
838 "displayId": "%s-LinearWiggleDisplay" % tId, | |
839 } | |
840 ], | |
841 } | |
842 style_json = self._prepare_track_style(trackDict) | |
843 trackDict["style"] = style_json | |
844 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
845 self.trackIdlist.append(tId) | |
846 | |
847 def add_bam(self, data, trackData, bam_indexes=None, **kwargs): | |
848 tId = trackData["label"] | |
849 realFName = trackData["path"] | |
850 useuri = trackData["useuri"].lower() == "yes" | |
851 categ = trackData["category"] | |
852 if useuri: | |
853 url = data | |
854 else: | |
855 fname = tId | |
856 dest = "%s/%s" % (self.outdir, fname) | |
857 self.subprocess_check_call(["cp", data, dest]) | |
858 url = fname | |
859 bindex = fname + ".bai" | |
860 bi = bam_indexes.split(",") | |
861 bam_index = [ | |
862 x.split("~~~")[1].strip() | |
863 for x in bi | |
864 if "~~~" in x and x.split("~~~")[0].strip() == realFName | |
865 ] | |
866 logging.debug( | |
867 "===realFName=%s got %s as bam_indexes %s as bi, %s for bam_index" | |
868 % (realFName, bam_indexes, bi, bam_index) | |
869 ) | |
870 if len(bam_index) > 0 and os.path.exists(os.path.realpath(bam_index[0])): | |
871 self.subprocess_check_call(["cp", bam_index[0], bindex]) | |
872 else: | |
873 cmd = ["samtools", "index", "-b", "-o", bindex, data] | |
874 self.subprocess_check_call(cmd) | |
875 trackDict = { | |
876 "type": "AlignmentsTrack", | |
877 "trackId": tId, | |
878 "name": trackData["name"], | |
879 "category": [ | |
880 categ, | |
881 ], | |
882 "assemblyNames": [trackData["assemblyNames"]], | |
883 "adapter": { | |
884 "type": "BamAdapter", | |
885 "bamLocation": {"uri": url}, | |
886 "index": { | |
887 "location": { | |
888 "uri": bindex, | |
889 } | |
890 }, | |
891 }, | |
892 "displays": [ | |
893 { | |
894 "type": "LinearAlignmentsDisplay", | |
895 "displayId": "%s-LinearAlignmentsDisplay" % tId, | |
896 }, | |
897 ], | |
898 } | |
899 style_json = self._prepare_track_style(trackDict) | |
900 trackDict["style"] = style_json | |
901 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
902 self.trackIdlist.append(tId) | |
903 | |
904 def add_cram(self, data, trackData, cram_indexes=None, **kwargs): | |
905 tId = trackData["label"] | |
906 realFName = trackData["path"] | |
907 categ = trackData["category"] | |
908 useuri = trackData["useuri"].lower() == "yes" | |
909 gsa = self.assmeta.get(trackData["assemblyNames"], None) | |
910 if gsa: | |
911 genseqad = gsa[0]["genome_sequence_adapter"] | |
912 else: | |
913 genseqad = "Not found" | |
914 logging.warning("No adapter found for cram %s in gsa=%s" % (tId, gsa)) | |
915 if useuri: | |
916 url = data | |
917 else: | |
918 fname = tId | |
919 dest = os.path.join(self.outdir, fname) | |
920 url = fname | |
921 self.subprocess_check_call(["cp", data, dest]) | |
922 ci = cram_indexes.split(",") | |
923 cram_index = [ | |
924 x.split("~~~")[1].strip() | |
925 for x in ci | |
926 if "~~~" in x and x.split("~~~")[0].strip() == realFName | |
927 ] | |
928 logging.debug( | |
929 "===realFName=%s got %s as cram_indexes %s as ci, %s for cram_index" | |
930 % (realFName, cram_indexes, ci, cram_index) | |
931 ) | |
932 if len(cram_index) > 0 and os.path.exists(cram_index[0]): | |
933 if not os.path.exists(dest + ".crai"): | |
934 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
935 self.subprocess_check_call( | |
936 ["cp", os.path.realpath(cram_index[0]), dest + ".crai"] | |
937 ) | |
938 else: | |
939 cpath = os.path.realpath(dest) + ".crai" | |
940 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)] | |
941 self.subprocess_check_call(cmd) | |
942 trackDict = { | |
943 "type": "AlignmentsTrack", | |
944 "trackId": tId, | |
945 "name": trackData["name"], | |
946 "category": [ | |
947 categ, | |
948 ], | |
949 "assemblyNames": [trackData["assemblyNames"]], | |
950 "adapter": { | |
951 "type": "CramAdapter", | |
952 "cramLocation": {"uri": url}, | |
953 "craiLocation": { | |
954 "uri": url + ".crai", | |
955 }, | |
956 "sequenceAdapter": genseqad, | |
957 }, | |
958 "displays": [ | |
959 { | |
960 "type": "LinearAlignmentsDisplay", | |
961 "displayId": "%s-LinearAlignmentsDisplay" % tId, | |
962 }, | |
963 ], | |
964 } | |
965 style_json = self._prepare_track_style(trackDict) | |
966 trackDict["style"] = style_json | |
967 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
968 self.trackIdlist.append(tId) | |
969 | |
970 def add_vcf(self, data, trackData): | |
971 tId = trackData["label"] | |
972 categ = trackData["category"] | |
973 useuri = trackData["useuri"].lower() == "yes" | |
974 if useuri: | |
975 url = data | |
976 else: | |
977 url = tId | |
978 dest = os.path.join(self.outdir, url) | |
979 cmd = ["bgzip", "-c", data] | |
980 with open(dest, "wb") as fout: | |
981 subprocess.run(cmd, stdout=fout) | |
982 cmd = ["tabix", "-f", "-p", "vcf", dest] | |
983 self.subprocess_check_call(cmd) | |
984 trackDict = { | |
985 "type": "VariantTrack", | |
986 "trackId": tId, | |
987 "name": trackData["name"], | |
988 "assemblyNames": [trackData["assemblyNames"]], | |
989 "category": [ | |
990 categ, | |
991 ], | |
992 "adapter": { | |
993 "type": "VcfTabixAdapter", | |
994 "vcfGzLocation": {"uri": url}, | |
995 "index": { | |
996 "location": { | |
997 "uri": url + ".tbi", | |
998 } | |
999 }, | |
1000 }, | |
1001 "displays": [ | |
1002 { | |
1003 "type": "LinearVariantDisplay", | |
1004 "displayId": "%s-LinearVariantDisplay" % tId, | |
1005 }, | |
1006 { | |
1007 "type": "ChordVariantDisplay", | |
1008 "displayId": "%s-ChordVariantDisplay" % tId, | |
1009 }, | |
1010 { | |
1011 "type": "LinearPairedArcDisplay", | |
1012 "displayId": "%s-LinearPairedArcDisplay" % tId, | |
1013 }, | |
1014 ], | |
1015 } | |
1016 style_json = self._prepare_track_style(trackDict) | |
1017 trackDict["style"] = style_json | |
1018 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
1019 self.trackIdlist.append(tId) | |
1020 | 1080 |
1021 def _sort_bed(self, data, dest): | 1081 def _sort_bed(self, data, dest): |
1022 # Only index if not already done | 1082 # Only index if not already done |
1023 if not os.path.exists(dest): | 1083 if not os.path.exists(dest): |
1024 cmd = ["sort", "-k1,1", "-k2,2n", data] | 1084 cmd = ["sort", "-k1,1", "-k2,2n", data] |
1025 ps = subprocess.run(cmd, check=True, capture_output=True) | 1085 with open(dest, "w") as handle: |
1026 cmd = ["bgzip", "-c"] | 1086 self.subprocess_check_call(cmd, output=handle) |
1027 with open(dest, "wb") as fout: | 1087 |
1028 subprocess.run(cmd, input=ps.stdout, stdout=fout) | 1088 self.subprocess_check_call(["bgzip", "-f", dest]) |
1029 cmd = ["tabix", "-f", "-p", "bed", dest] | 1089 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"]) |
1030 self.subprocess_check_call(cmd) | 1090 |
1031 | 1091 def _convert_maf(self, data, dest, assembly_name): |
1032 def add_bed(self, data, ext, trackData): | 1092 # Only convert if not already done |
1033 bedPlugin = {"name": "BedScorePlugin", "umdLoc": {"uri": "bedscoreplugin.js"}} | 1093 if not os.path.exists(dest): |
1034 tId = trackData["label"] | 1094 |
1035 categ = trackData["category"] | 1095 dest_bed = dest + ".bed" |
1036 useuri = trackData["useuri"].lower() == "yes" | 1096 cmd = ["python", os.path.join(SELF_LOCATION, "maf2bed.py"), assembly_name, data, dest_bed] |
1037 if useuri: | 1097 self.subprocess_check_call(cmd, cwd=False) |
1038 url = data | 1098 |
1039 else: | 1099 cmd = ["sort", "-k1,1", "-k2,2n", dest_bed] |
1040 url = tId + ".gz" | 1100 with open(dest, "w") as handle: |
1041 dest = os.path.join(self.outdir, url) | 1101 self.subprocess_check_call(cmd, output=handle) |
1042 self._sort_bed(data, dest) | 1102 |
1043 if True or trackData.get("usebedscore", None): | 1103 self.subprocess_check_call(["bgzip", "-f", dest], cwd=False) |
1044 bedgzlocation = { | 1104 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"], cwd=False) |
1045 "uri": url, | 1105 |
1046 "columnNames": ["chr", "start", "end", "name", "score"], | 1106 def process_annotations(self, track, parent): |
1047 "scoreColumn": "score", | |
1048 } | |
1049 else: | |
1050 bedgzlocation = { | |
1051 "uri": url, | |
1052 } | |
1053 trackDict = { | |
1054 "type": "FeatureTrack", | |
1055 "trackId": tId, | |
1056 "name": trackData["name"], | |
1057 "assemblyNames": [trackData["assemblyNames"]], | |
1058 "adapter": { | |
1059 "category": [ | |
1060 categ, | |
1061 ], | |
1062 "type": "BedTabixAdapter", | |
1063 "bedGzLocation": bedgzlocation, | |
1064 "index": { | |
1065 "location": { | |
1066 "uri": url + ".tbi", | |
1067 }, | |
1068 }, | |
1069 }, | |
1070 "displays": [ | |
1071 { | |
1072 "type": "LinearBasicDisplay", | |
1073 "displayId": "%s-LinearBasicDisplay" % tId, | |
1074 "renderer": { | |
1075 "type": "SvgFeatureRenderer", | |
1076 "color1": "jexl:customColor(feature)", | |
1077 }, | |
1078 }, | |
1079 { | |
1080 "type": "LinearPileupDisplay", | |
1081 "displayId": "%s-LinearPileupDisplay" % tId, | |
1082 }, | |
1083 { | |
1084 "type": "LinearArcDisplay", | |
1085 "displayId": "%s-LinearArcDisplay" % tId, | |
1086 }, | |
1087 ], | |
1088 } | |
1089 style_json = self._prepare_track_style(trackDict) | |
1090 trackDict["style"] = style_json | |
1091 if self.config_json.get("plugins", None): | |
1092 self.config_json["plugins"].append(bedPlugin) | |
1093 else: | |
1094 self.config_json["plugins"] = [ | |
1095 bedPlugin, | |
1096 ] | |
1097 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
1098 self.trackIdlist.append(tId) | |
1099 | |
1100 def add_paf(self, data, trackData, pafOpts, **kwargs): | |
1101 canPIF = True | |
1102 tname = trackData["name"] | |
1103 tId = trackData["label"] | |
1104 url = tId | |
1105 usePIF = False # much faster if indexed remotely or locally but broken in biocontainer. | |
1106 useuri = data.startswith("http://") or data.startswith("https://") | |
1107 if not useuri: | |
1108 if canPIF: | |
1109 fakeName = os.path.join(self.outdir, "%s.paf" % tId) | |
1110 url = "%s.pif.gz" % tId | |
1111 cmd = ["cp", data, fakeName] | |
1112 self.subprocess_check_call(cmd) | |
1113 e = os.environ | |
1114 e["SHELL"] = "/bin/sh" | |
1115 cmd = [ | |
1116 "jbrowse", | |
1117 "make-pif", | |
1118 fakeName, | |
1119 ] | |
1120 subprocess.run(cmd, env=e, shell=True) | |
1121 usePIF = True | |
1122 else: | |
1123 dest = os.path.join(self.outdir, url) | |
1124 self.symlink_or_copy(os.path.realpath(data), dest) | |
1125 else: | |
1126 url = data | |
1127 if data.endswith(".pif.gz") or data.endswith(".paf.gz"): # is tabix | |
1128 usePIF = True | |
1129 categ = trackData["category"] | |
1130 pg = pafOpts["genome"].split(",") | |
1131 pgc = [x.strip() for x in pg if x.strip() > ""] | |
1132 gnomes = [x.split("~~~") for x in pgc] | |
1133 logging.debug("pg=%s, gnomes=%s" % (pg, gnomes)) | |
1134 passnames = [trackData["assemblyNames"]] # always first | |
1135 for i, (gpath, gname) in enumerate(gnomes): | |
1136 # may have been forgotten by user for uri | |
1137 if len(gname) == 0: | |
1138 gn = os.path.basename(gpath) | |
1139 gname = os.path.splitext(gn)[0] | |
1140 # trouble from spacey names in command lines avoidance | |
1141 if len(gname.split()) > 1: | |
1142 gname = gname.split()[0] | |
1143 if gname not in passnames: | |
1144 passnames.append(gname) | |
1145 useuri = pafOpts["useuri"] == "true" | |
1146 if gname not in self.genome_names: | |
1147 # ignore if already there - eg for duplicates among pafs. | |
1148 asstrack, first_contig = self.make_assembly(gpath, gname, useuri) | |
1149 self.genome_names.append(gname) | |
1150 self.tracksToAdd[gname] = [] | |
1151 self.assemblies.append(copy.copy(asstrack)) | |
1152 self.ass_first_contigs.append(copy.copy(first_contig)) | |
1153 trackDict = { | |
1154 "type": "SyntenyTrack", | |
1155 "trackId": tId, | |
1156 "assemblyNames": passnames, | |
1157 "category": [ | |
1158 categ, | |
1159 ], | |
1160 "name": tname, | |
1161 "displays": [ | |
1162 { | |
1163 "type": "LGVSyntenyDisplay", | |
1164 "displayId": "%s-LGVSyntenyDisplay" % tId, | |
1165 }, | |
1166 { | |
1167 "type": "DotplotDisplay", | |
1168 "displayId": "%s-DotplotDisplay" % tId, | |
1169 }, | |
1170 { | |
1171 "type": "LinearComparativeDisplay", | |
1172 "displayId": "%s-LinearComparativeDisplay" % tId, | |
1173 }, | |
1174 { | |
1175 "type": "LinearBasicDisplay", | |
1176 "displayId": "%s-LinearSyntenyDisplay" % tId, | |
1177 }, | |
1178 ], | |
1179 } | |
1180 if usePIF: | |
1181 trackDict["adapter"] = { | |
1182 "type": "PairwiseIndexedPAFAdapter", | |
1183 "pifGzLocation": {"uri": url}, | |
1184 "assemblyNames": passnames, | |
1185 "index": { | |
1186 "location": { | |
1187 "uri": url + ".tbi", | |
1188 } | |
1189 }, | |
1190 } | |
1191 else: | |
1192 trackDict["adapter"] = { | |
1193 "type": "PAFAdapter", | |
1194 "pafLocation": {"uri": url}, | |
1195 "assemblyNames": passnames, | |
1196 } | |
1197 if not usePIF: | |
1198 style_json = { | |
1199 "type": "LGVSyntenyDisplay", | |
1200 "displayId": "%s-LGVSyntenyDisplay" % tId, | |
1201 } | |
1202 else: | |
1203 style_json = { | |
1204 "type": "LinearBasicDisplay", | |
1205 "displayId": "%s-LinearBasicDisplay" % tId, | |
1206 } | |
1207 trackDict["style"] = style_json | |
1208 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
1209 self.trackIdlist.append(tId) | |
1210 | |
1211 def process_annotations(self, track): | |
1212 category = track["category"].replace("__pd__date__pd__", TODAY) | 1107 category = track["category"].replace("__pd__date__pd__", TODAY) |
1213 tt1 = ",/ :;\\" | 1108 |
1214 tt2 = "______" | 1109 track_labels = [] |
1215 labttab = str.maketrans(tt1, tt2) | 1110 |
1216 for trackIndex, ( | 1111 for i, ( |
1217 dataset_path, | 1112 dataset_path, |
1218 dataset_ext, | 1113 dataset_ext, |
1219 useuri, | |
1220 track_human_label, | 1114 track_human_label, |
1221 extra_metadata, | 1115 extra_metadata, |
1222 ) in enumerate(track["trackfiles"]): | 1116 ) in enumerate(track["trackfiles"]): |
1223 if not dataset_path.strip().startswith("http"): | 1117 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) |
1224 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) | 1118 for key, value in mapped_chars.items(): |
1225 for key, value in mapped_chars.items(): | 1119 track_human_label = track_human_label.replace(value, key) |
1226 track_human_label = track_human_label.replace(value, key) | 1120 |
1227 track_human_label = track_human_label.translate(labttab) | 1121 is_multi = type(dataset_path) is list |
1122 | |
1123 log.info( | |
1124 f"-----> Processing track {category} / {track_human_label} ({dataset_ext}, {len(dataset_path) if is_multi else 1} files)" | |
1125 ) | |
1126 | |
1228 outputTrackConfig = { | 1127 outputTrackConfig = { |
1229 "category": category, | 1128 "category": category, |
1230 "style": {}, | |
1231 } | 1129 } |
1232 | 1130 |
1233 outputTrackConfig["assemblyNames"] = track["assemblyNames"] | |
1234 outputTrackConfig["key"] = track_human_label | 1131 outputTrackConfig["key"] = track_human_label |
1235 outputTrackConfig["useuri"] = useuri | 1132 # We add extra data to hash for the case of non-file tracks |
1236 outputTrackConfig["path"] = dataset_path | 1133 if ( |
1237 outputTrackConfig["ext"] = dataset_ext | 1134 "conf" in track |
1238 outputTrackConfig["trackset"] = track.get("trackset", {}) | 1135 and "options" in track["conf"] |
1239 outputTrackConfig["label"] = track["label"] | 1136 and "url" in track["conf"]["options"] |
1137 ): | |
1138 non_file_info = track["conf"]["options"]["url"] | |
1139 else: | |
1140 non_file_info = "" | |
1141 | |
1142 # I chose to use track['category'] instead of 'category' here. This | |
1143 # is intentional. This way re-running the tool on a different date | |
1144 # will not generate different hashes and make comparison of outputs | |
1145 # much simpler. | |
1146 hashData = [ | |
1147 str(dataset_path), | |
1148 track_human_label, | |
1149 track["category"], | |
1150 non_file_info, | |
1151 parent["uniq_id"], | |
1152 ] | |
1153 hashData = "|".join(hashData).encode("utf-8") | |
1154 outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + f"_{track['track_num']}_{i}" | |
1240 outputTrackConfig["metadata"] = extra_metadata | 1155 outputTrackConfig["metadata"] = extra_metadata |
1241 outputTrackConfig["name"] = track_human_label | 1156 |
1242 if track["label"] in self.trackIdlist: | 1157 outputTrackConfig["style"] = track["style"] |
1243 logging.error( | 1158 |
1244 "### not adding %s already in %s" | 1159 outputTrackConfig["formatdetails"] = track["formatdetails"] |
1245 % (track["label"], self.trackIdlist) | 1160 |
1246 ) | 1161 outputTrackConfig["remote"] = track["remote"] |
1247 yield None | 1162 |
1163 # Guess extension for remote data | |
1164 if dataset_ext == "gff,gff3,bed": | |
1165 if dataset_path.endswith(".bed") or dataset_path.endswith(".bed.gz"): | |
1166 dataset_ext = "bed" | |
1167 else: | |
1168 dataset_ext = "gff" | |
1169 elif dataset_ext == "vcf,vcf_bgzip": | |
1170 if dataset_path.endswith(".vcf.gz"): | |
1171 dataset_ext = "vcf_bgzip" | |
1172 else: | |
1173 dataset_ext = "vcf" | |
1174 | |
1248 if dataset_ext in ("gff", "gff3"): | 1175 if dataset_ext in ("gff", "gff3"): |
1249 self.add_gff( | 1176 self.add_gff( |
1250 dataset_path, | 1177 parent, |
1251 outputTrackConfig, | |
1252 ) | |
1253 elif dataset_ext in ("hic", "juicebox_hic"): | |
1254 outputTrackConfig["wasCool"] = False | |
1255 self.add_hic( | |
1256 dataset_path, | |
1257 outputTrackConfig, | |
1258 ) | |
1259 elif dataset_ext in ("cool", "mcool", "scool"): | |
1260 hic_url = outputTrackConfig["label"] | |
1261 hic_path = os.path.join(self.outdir, hic_url) + ".hic" | |
1262 outputTrackConfig["wasCool"] = True | |
1263 self.subprocess_check_call( | |
1264 [ | |
1265 "hictk", | |
1266 "convert", | |
1267 "-f", | |
1268 "--output-fmt", | |
1269 "hic", | |
1270 dataset_path, | |
1271 hic_path, | |
1272 ] | |
1273 ) | |
1274 self.add_hic( | |
1275 hic_path, | |
1276 outputTrackConfig, | |
1277 ) | |
1278 elif dataset_ext in ("bed",): | |
1279 self.add_bed( | |
1280 dataset_path, | 1178 dataset_path, |
1281 dataset_ext, | 1179 dataset_ext, |
1282 outputTrackConfig, | 1180 outputTrackConfig, |
1181 track["conf"]["options"]["gff"], | |
1283 ) | 1182 ) |
1284 elif dataset_ext in ("maf",): | 1183 elif dataset_ext == "bed": |
1184 self.add_bed( | |
1185 parent, | |
1186 dataset_path, | |
1187 dataset_ext, | |
1188 outputTrackConfig, | |
1189 track["conf"]["options"]["gff"], | |
1190 ) | |
1191 elif dataset_ext == "bigwig": | |
1192 if is_multi: | |
1193 self.add_bigwig_multi( | |
1194 parent, | |
1195 dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"] | |
1196 ) | |
1197 else: | |
1198 self.add_bigwig( | |
1199 parent, | |
1200 dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"] | |
1201 ) | |
1202 elif dataset_ext == "maf": | |
1285 self.add_maf( | 1203 self.add_maf( |
1204 parent, | |
1205 dataset_path, outputTrackConfig, track["conf"]["options"]["maf"] | |
1206 ) | |
1207 elif dataset_ext == "bam": | |
1208 | |
1209 if track["remote"]: | |
1210 bam_index = dataset_path + '.bai' | |
1211 else: | |
1212 real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][ | |
1213 "bam_index" | |
1214 ] | |
1215 if not isinstance(real_indexes, list): | |
1216 # <bam_indices> | |
1217 # <bam_index>/path/to/a.bam.bai</bam_index> | |
1218 # </bam_indices> | |
1219 # | |
1220 # The above will result in the 'bam_index' key containing a | |
1221 # string. If there are two or more indices, the container | |
1222 # becomes a list. Fun! | |
1223 real_indexes = [real_indexes] | |
1224 | |
1225 bam_index = real_indexes[i] | |
1226 | |
1227 self.add_xam( | |
1228 parent, | |
1286 dataset_path, | 1229 dataset_path, |
1287 outputTrackConfig, | 1230 outputTrackConfig, |
1231 track["conf"]["options"]["pileup"], | |
1232 index=bam_index, | |
1233 ext="bam", | |
1288 ) | 1234 ) |
1289 elif dataset_ext == "bigwig": | 1235 elif dataset_ext == "cram": |
1290 self.add_bigwig( | 1236 |
1237 if track["remote"]: | |
1238 cram_index = dataset_path + '.crai' | |
1239 else: | |
1240 real_indexes = track["conf"]["options"]["cram"]["cram_indices"][ | |
1241 "cram_index" | |
1242 ] | |
1243 if not isinstance(real_indexes, list): | |
1244 # <bam_indices> | |
1245 # <bam_index>/path/to/a.bam.bai</bam_index> | |
1246 # </bam_indices> | |
1247 # | |
1248 # The above will result in the 'bam_index' key containing a | |
1249 # string. If there are two or more indices, the container | |
1250 # becomes a list. Fun! | |
1251 real_indexes = [real_indexes] | |
1252 | |
1253 cram_index = real_indexes[i] | |
1254 | |
1255 self.add_xam( | |
1256 parent, | |
1291 dataset_path, | 1257 dataset_path, |
1292 outputTrackConfig, | 1258 outputTrackConfig, |
1259 track["conf"]["options"]["cram"], | |
1260 index=cram_index, | |
1261 ext="cram", | |
1293 ) | 1262 ) |
1294 elif dataset_ext == "bam": | 1263 elif dataset_ext == "vcf": |
1295 real_indexes = track["conf"]["options"]["bam"]["bam_index"] | 1264 self.add_vcf( |
1296 self.add_bam( | 1265 parent, |
1266 dataset_path, | |
1267 outputTrackConfig | |
1268 ) | |
1269 elif dataset_ext == "vcf_bgzip": | |
1270 self.add_vcf( | |
1271 parent, | |
1297 dataset_path, | 1272 dataset_path, |
1298 outputTrackConfig, | 1273 outputTrackConfig, |
1299 bam_indexes=real_indexes, | 1274 zipped=True |
1300 ) | 1275 ) |
1301 elif dataset_ext == "cram": | 1276 elif dataset_ext == "paf": # https://fr.wikipedia.org/wiki/Paf_le_chien |
1302 real_indexes = track["conf"]["options"]["cram"]["cram_index"] | 1277 self.add_paf( |
1303 self.add_cram( | 1278 parent, |
1304 dataset_path, | 1279 dataset_path, |
1305 outputTrackConfig, | 1280 outputTrackConfig, |
1306 cram_indexes=real_indexes, | 1281 track["conf"]["options"]["synteny"] |
1307 ) | 1282 ) |
1308 elif dataset_ext == "vcf": | 1283 elif dataset_ext in ("hic"): |
1309 self.add_vcf(dataset_path, outputTrackConfig) | 1284 self.add_hic( |
1310 elif dataset_ext == "paf": | 1285 parent, |
1311 self.add_paf( | |
1312 dataset_path, | 1286 dataset_path, |
1313 outputTrackConfig, | 1287 outputTrackConfig, |
1314 track["conf"]["options"]["paf"], | 1288 track["conf"]["options"]["hic"] |
1289 ) | |
1290 elif dataset_ext == "sparql": | |
1291 sparql_query = track["conf"]["options"]["sparql"]["query"] | |
1292 for key, value in mapped_chars.items(): | |
1293 sparql_query = sparql_query.replace(value, key) | |
1294 sparql_query_refnames = track["conf"]["options"]["sparql"].get("query_refnames", "") | |
1295 if sparql_query_refnames: | |
1296 for key, value in mapped_chars.items(): | |
1297 sparql_query_refnames = sparql_query_refnames.replace(value, key) | |
1298 self.add_sparql( | |
1299 parent, | |
1300 track["conf"]["options"]["sparql"]["url"], | |
1301 sparql_query, | |
1302 sparql_query_refnames, | |
1303 outputTrackConfig, | |
1304 ) | |
1305 elif dataset_ext == "gc": | |
1306 self.add_gc_content( | |
1307 parent, | |
1308 outputTrackConfig, | |
1315 ) | 1309 ) |
1316 else: | 1310 else: |
1317 logging.warning("Do not know how to handle %s", dataset_ext) | 1311 log.error(f"Do not know how to handle {dataset_ext}") |
1318 # Return non-human label for use in other fields | 1312 |
1319 yield outputTrackConfig["label"] | 1313 track_labels.append(outputTrackConfig["label"]) |
1320 | 1314 |
1321 def add_default_session(self, default_data): | 1315 # Return non-human label for use in other fields |
1316 return track_labels | |
1317 | |
1318 def add_default_view_genome(self, genome, default_loc, tracks_on): | |
1319 | |
1320 refName = "" | |
1321 start = end = None | |
1322 if default_loc: | |
1323 loc_match = re.search(r"^(\w+):(\d+)\.+(\d+)$", default_loc) | |
1324 if loc_match: | |
1325 refName = loc_match.group(1) | |
1326 start = int(loc_match.group(2)) | |
1327 end = int(loc_match.group(3)) | |
1328 | |
1329 if not refName and self.assembly_ids[genome['uniq_id']]: | |
1330 refName = self.assembly_ids[genome['uniq_id']] | |
1331 | |
1332 if start and end: | |
1333 loc_str = f"{refName}:{start}-{end}" | |
1334 else: | |
1335 loc_str = refName | |
1336 | |
1337 # Updating an existing jbrowse instance, merge with pre-existing view | |
1338 view_specs = None | |
1339 if self.update: | |
1340 for existing in self.default_views.values(): | |
1341 if len(existing) and existing["type"] == "LinearGenomeView": | |
1342 if existing['init']['assembly'] == genome['uniq_id']: | |
1343 view_specs = existing | |
1344 if loc_str: | |
1345 view_specs['init']['loc'] = loc_str | |
1346 view_specs['init']['tracks'].extend(tracks_on) | |
1347 | |
1348 if view_specs is None: # Not updating, or updating from synteny | |
1349 view_specs = { | |
1350 "type": "LinearGenomeView", | |
1351 "init": { | |
1352 "assembly": genome['uniq_id'], | |
1353 "loc": loc_str, | |
1354 "tracks": tracks_on | |
1355 } | |
1356 } | |
1357 | |
1358 return view_specs | |
1359 | |
1360 def add_default_view_synteny(self, genome_views, synteny_tracks): | |
1361 | |
1362 # Add json for cached synteny tracks | |
1363 # We cache them because we need to know the target genome uniq_id | |
1364 for strack in synteny_tracks: | |
1365 | |
1366 # Target assembly is the next genome, find its uniq_id | |
1367 query_assembly = strack["assemblyNames"][0] | |
1368 ass_uniq_ids = list(self.assembly_ids.keys()) | |
1369 query_index = ass_uniq_ids.index(query_assembly) | |
1370 target_assembly = ass_uniq_ids[query_index + 1] | |
1371 | |
1372 strack["assemblyNames"][1] = target_assembly | |
1373 strack["adapter"]["assemblyNames"][1] = target_assembly | |
1374 | |
1375 self.subprocess_check_call( | |
1376 [ | |
1377 "jbrowse", | |
1378 "add-track-json", | |
1379 "--target", | |
1380 self.outdir, | |
1381 json.dumps(strack), | |
1382 ] | |
1383 ) | |
1384 | |
1385 # Configure the synteny view | |
1386 levels = [] | |
1387 | |
1388 for strack in synteny_tracks: | |
1389 lev = { | |
1390 "type": "LinearSyntenyViewHelper", | |
1391 "tracks": [ | |
1392 { | |
1393 "type": "SyntenyTrack", | |
1394 "configuration": strack["trackId"], | |
1395 "displays": [ | |
1396 { | |
1397 "type": "LinearSyntenyDisplay", | |
1398 "configuration": strack["trackId"] + "_LinearSyntenyDisplay" | |
1399 } | |
1400 ] | |
1401 } | |
1402 ], | |
1403 "height": 100, | |
1404 "level": len(levels) | |
1405 } | |
1406 levels.append(lev) | |
1407 | |
1408 view_specs = { | |
1409 "type": "LinearSyntenyView", | |
1410 "views": genome_views, | |
1411 "levels": levels | |
1412 } | |
1413 | |
1414 return view_specs | |
1415 | |
1416 def add_default_session(self, default_views): | |
1322 """ | 1417 """ |
1323 default session settings are hard and fragile. | |
1324 .add_default_view() and other configuration code adapted from | |
1325 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py | |
1326 """ | |
1327 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 | |
1328 bpPerPx = ( | |
1329 self.bpPerPx | |
1330 ) # Browser window width is unknown and default session cannot be used to figure it out in JB2 code so could be 200-2000+ pixels. | |
1331 track_types = {} | |
1332 with open(self.config_json_file, "r") as config_file: | |
1333 config_json = json.load(config_file) | |
1334 if self.config_json: | |
1335 config_json.update(self.config_json) | |
1336 if "defaultSession" in config_json: | |
1337 session_json = config_json["defaultSession"] | |
1338 session_views = [] | |
1339 else: | |
1340 session_json = {} | |
1341 session_views = [] | |
1342 for gnome in self.assmeta.keys(): # assemblies have their own tracks | |
1343 tracks_data = [] | |
1344 for track_conf in self.tracksToAdd[gnome]: | |
1345 tId = track_conf["trackId"] | |
1346 if tId in default_data[gnome]["visibility"]["default_on"]: | |
1347 track_types[tId] = track_conf["type"] | |
1348 style_data = default_data[gnome]["style"].get(tId, {}) | |
1349 if not style_data: | |
1350 logging.debug( | |
1351 "No style data for %s in available default data %s" | |
1352 % (tId, default_data) | |
1353 ) | |
1354 else: | |
1355 logging.debug("style data for %s = %s" % (tId, style_data)) | |
1356 if style_data.get("type", None) is None: | |
1357 style_data["type"] = "LinearBasicDisplay" | |
1358 if "displays" in track_conf: | |
1359 disp = track_conf["displays"][0]["type"] | |
1360 style_data["type"] = disp | |
1361 if track_conf.get("displays", None): | |
1362 style_data["configuration"] = track_conf["displays"][0][ | |
1363 "displayId" | |
1364 ] | |
1365 else: | |
1366 logging.debug("no display in track_conf for %s" % tId) | |
1367 if track_conf.get("style_labels", None): | |
1368 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work | |
1369 # TODO move this to per track displays? | |
1370 style_data["labels"] = track_conf["style_labels"] | |
1371 tracks_data.append( | |
1372 { | |
1373 "type": track_types[tId], | |
1374 "configuration": tId, | |
1375 "displays": [style_data], | |
1376 } | |
1377 ) | |
1378 first = [x for x in self.ass_first_contigs if x[0] == gnome] | |
1379 drdict = { | |
1380 "reversed": False, | |
1381 "assemblyName": gnome, | |
1382 } | |
1383 if len(first) > 0: | |
1384 [gnome, refName, end] = first[0] | |
1385 drdict["refName"] = refName | |
1386 drdict["start"] = 0 | |
1387 end = int(end) | |
1388 drdict["end"] = end | |
1389 else: | |
1390 ddl = default_data.get("defaultLocation", None) | |
1391 if ddl: | |
1392 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) | |
1393 # allow commas like 100,000 but ignore as integer | |
1394 if loc_match: | |
1395 refName = loc_match.group(1) | |
1396 drdict["refName"] = refName | |
1397 if loc_match.group(2) > "": | |
1398 drdict["start"] = int(loc_match.group(2).replace(",", "")) | |
1399 if loc_match.group(3) > "": | |
1400 drdict["end"] = int(loc_match.group(3).replace(",", "")) | |
1401 else: | |
1402 logging.info( | |
1403 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" | |
1404 % ddl | |
1405 ) | |
1406 view_json = { | |
1407 "type": "LinearGenomeView", | |
1408 "offsetPx": 0, | |
1409 "bpPerPx": bpPerPx, | |
1410 "minimized": False, | |
1411 "tracks": tracks_data, | |
1412 } | |
1413 if drdict.get("refName", None): | |
1414 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome | |
1415 view_json["displayedRegions"] = [ | |
1416 drdict, | |
1417 ] | |
1418 logging.info("@@@ defaultlocation %s for default session" % drdict) | |
1419 else: | |
1420 logging.info( | |
1421 "@@@ no track location for default session - please add one!" | |
1422 ) | |
1423 session_views.append(view_json) | |
1424 session_name = default_data.get("session_name", "New session") | |
1425 session_json["name"] = session_name | |
1426 | |
1427 if "views" not in session_json: | |
1428 session_json["views"] = session_views | |
1429 else: | |
1430 session_json["views"] += session_views | |
1431 | |
1432 pp = json.dumps(session_views, indent=2) | |
1433 config_json["defaultSession"] = session_json | |
1434 self.config_json.update(config_json) | |
1435 logging.debug("defaultSession=%s" % (pp)) | |
1436 with open(self.config_json_file, "w") as config_file: | |
1437 json.dump(self.config_json, config_file, indent=2) | |
1438 | |
1439 def add_defsess_to_index(self, data): | |
1440 """ | |
1441 ---------------------------------------------------------- | |
1442 Add some default session settings: set some assemblies/tracks on/off | 1418 Add some default session settings: set some assemblies/tracks on/off |
1443 | 1419 |
1444 This allows to select a default view: | 1420 This allows to select a default view: |
1445 - jb type (Linear, Circular, etc) | 1421 - jb type (Linear, Circular, etc) |
1446 - default location on an assembly | 1422 - default location on an assembly |
1447 - default tracks | 1423 - default tracks |
1448 - ... | 1424 - ... |
1449 | 1425 |
1450 Different methods to do that were tested/discussed: | 1426 Now using this method: |
1451 - using a defaultSession item in config.json: this proved to be difficult: | 1427 https://github.com/GMOD/jbrowse-components/pull/4907 |
1428 | |
1429 Different methods that were tested/discussed earlier: | |
1430 - using a defaultSession item in config.json before PR 4970: this proved to be difficult: | |
1452 forced to write a full session block, including hard-coded/hard-to-guess items, | 1431 forced to write a full session block, including hard-coded/hard-to-guess items, |
1453 no good way to let Jbrowse2 display a scaffold without knowing its size | 1432 no good way to let Jbrowse2 display a scaffold without knowing its size |
1454 - using JBrowse2 as an embedded React component in a tool-generated html file: | 1433 - using JBrowse2 as an embedded React component in a tool-generated html file: |
1455 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ... | 1434 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ... |
1456 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below) | 1435 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below) |
1457 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session | 1436 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session |
1458 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe | 1437 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe |
1459 we selected this option | |
1460 | 1438 |
1461 Xrefs to understand the choices: | 1439 Xrefs to understand the choices: |
1462 https://github.com/GMOD/jbrowse-components/issues/2708 | 1440 https://github.com/GMOD/jbrowse-components/issues/2708 |
1463 https://github.com/GMOD/jbrowse-components/discussions/3568 | 1441 https://github.com/GMOD/jbrowse-components/discussions/3568 |
1464 https://github.com/GMOD/jbrowse-components/pull/4148 | 1442 https://github.com/GMOD/jbrowse-components/pull/4148 |
1465 """ | 1443 """ |
1466 new_index = "Nothing written" | 1444 |
1467 session_spec = {"views": []} | 1445 if self.use_synteny_viewer: |
1468 logging.debug("def ass_first=%s\ndata=%s" % (self.ass_first_contigs, data)) | 1446 session_name = "Synteny" |
1469 for first_contig in self.ass_first_contigs: | 1447 else: |
1470 logging.debug("first contig=%s" % self.ass_first_contigs) | 1448 session_name = ', '.join(x['init']['assembly'] for x in default_views) |
1471 [gnome, refName, end] = first_contig | 1449 |
1472 start = 0 | 1450 session_spec = { |
1473 aview = { | 1451 "name": session_name, |
1474 "assembly": gnome, | 1452 "views": default_views |
1475 "loc": "{}:{}..{}".format(refName, start, end), | 1453 } |
1476 "type": "LinearGenomeView", | 1454 |
1477 "tracks": data[gnome]["tracks"], | 1455 config_path = os.path.join(self.outdir, "config.json") |
1478 } | 1456 with open(config_path, "r") as config_file: |
1479 session_spec["views"].append(aview) | 1457 config_json = json.load(config_file) |
1480 sess = json.dumps(session_spec, sort_keys=True, indent=2) | 1458 |
1481 new_index = INDEX_TEMPLATE.replace( | 1459 config_json["defaultSession"].update(session_spec) |
1482 "__SESSION_SPEC__", "&session=spec-{}".format(sess) | 1460 |
1483 ) | 1461 with open(config_path, "w") as config_file: |
1484 | 1462 json.dump(config_json, config_file, indent=2) |
1485 os.rename( | |
1486 os.path.join(self.outdir, "index.html"), | |
1487 os.path.join(self.outdir, "index_noview.html"), | |
1488 ) | |
1489 | |
1490 with open(os.path.join(self.outdir, "index.html"), "w") as nind: | |
1491 nind.write(new_index) | |
1492 logging.debug( | |
1493 "#### add_defsession gnome=%s refname=%s\nsession_spec=%s\nnew_index=%s" | |
1494 % (gnome, refName, sess, new_index) | |
1495 ) | |
1496 | 1463 |
1497 def add_general_configuration(self, data): | 1464 def add_general_configuration(self, data): |
1498 """ | 1465 """ |
1499 Add some general configuration to the config.json file | 1466 Add some general configuration to the config.json file |
1500 """ | 1467 """ |
1501 | 1468 |
1502 config_path = self.config_json_file | 1469 config_path = os.path.join(self.outdir, "config.json") |
1503 if os.path.exists(config_path): | 1470 with open(config_path, "r") as config_file: |
1504 with open(config_path, "r") as config_file: | 1471 config_json = json.load(config_file) |
1505 config_json = json.load(config_file) | 1472 |
1506 else: | |
1507 config_json = {} | |
1508 if self.config_json: | |
1509 config_json.update(self.config_json) | |
1510 config_data = {} | 1473 config_data = {} |
1511 | 1474 |
1512 config_data["disableAnalytics"] = data.get("analytics", "false") == "true" | 1475 config_data["disableAnalytics"] = data.get("analytics", "false") == "true" |
1513 | 1476 |
1514 config_data["theme"] = { | 1477 config_data["theme"] = { |
1518 "tertiary": {"main": data.get("tertiary_color", "#135560")}, | 1481 "tertiary": {"main": data.get("tertiary_color", "#135560")}, |
1519 "quaternary": {"main": data.get("quaternary_color", "#FFB11D")}, | 1482 "quaternary": {"main": data.get("quaternary_color", "#FFB11D")}, |
1520 }, | 1483 }, |
1521 "typography": {"fontSize": int(data.get("font_size", 10))}, | 1484 "typography": {"fontSize": int(data.get("font_size", 10))}, |
1522 } | 1485 } |
1523 if not config_json.get("configuration", None): | 1486 |
1524 config_json["configuration"] = {} | |
1525 config_json["configuration"].update(config_data) | 1487 config_json["configuration"].update(config_data) |
1526 self.config_json.update(config_json) | 1488 |
1527 with open(config_path, "w") as config_file: | 1489 with open(config_path, "w") as config_file: |
1528 json.dump(self.config_json, config_file, indent=2) | 1490 json.dump(config_json, config_file, indent=2) |
1529 | 1491 |
1530 def clone_jbrowse(self, realclone=False): | 1492 def add_plugins(self, data): |
1531 """ | 1493 """ |
1532 Clone a JBrowse directory into a destination directory. | 1494 Add plugins to the config.json file |
1533 | |
1534 `realclone=true` will use the `jbrowse create` command. | |
1535 To allow running on internet-less compute and for reproducibility | |
1536 use frozen code with `realclone=false | |
1537 | |
1538 """ | 1495 """ |
1539 dest = self.outdir | 1496 |
1540 if (not os.path.exists(self.jbrowse2path)) or realclone: | 1497 config_path = os.path.join(self.outdir, "config.json") |
1541 e = os.environ | 1498 with open(config_path, "r") as config_file: |
1542 e["SHELL"] = "/bin/sh" | 1499 config_json = json.load(config_file) |
1543 cmd = ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"] | 1500 |
1544 subprocess.run(cmd, env=e, shell=True) | 1501 if "plugins" not in config_json: |
1502 config_json["plugins"] = [] | |
1503 | |
1504 config_json["plugins"].extend(data) | |
1505 | |
1506 with open(config_path, "w") as config_file: | |
1507 json.dump(config_json, config_file, indent=2) | |
1508 | |
1509 def clone_jbrowse(self, jbrowse_dir, destination): | |
1510 """ | |
1511 Clone a JBrowse directory into a destination directory. | |
1512 | |
1513 Not using `jbrowse create` command to allow running on internet-less compute + to make sure code is frozen | |
1514 """ | |
1515 | |
1516 copytree(jbrowse_dir, destination) | |
1517 try: | |
1518 shutil.rmtree(os.path.join(destination, "test_data")) | |
1519 except OSError as e: | |
1520 log.error(f"Error: {e.filename} - {e.strerror}.") | |
1521 | |
1522 if not os.path.exists(os.path.join(destination, "data")): | |
1523 # It can already exist if upgrading an instance | |
1524 os.makedirs(os.path.join(destination, "data")) | |
1525 log.info(f"makedir {os.path.join(destination, 'data')}") | |
1526 | |
1527 os.symlink("./data/config.json", os.path.join(destination, "config.json")) | |
1528 | |
1529 | |
1530 def copytree(src, dst, symlinks=False, ignore=None): | |
1531 for item in os.listdir(src): | |
1532 s = os.path.join(src, item) | |
1533 d = os.path.join(dst, item) | |
1534 if os.path.isdir(s): | |
1535 shutil.copytree(s, d, symlinks, ignore) | |
1545 else: | 1536 else: |
1546 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) | 1537 shutil.copy2(s, d) |
1547 for fn in [ | |
1548 "asset-manifest.json", | |
1549 "favicon.ico", | |
1550 "robots.txt", | |
1551 "umd_plugin.js", | |
1552 "version.txt", | |
1553 "test_data", | |
1554 ]: | |
1555 try: | |
1556 path = os.path.join(dest, fn) | |
1557 if os.path.isdir(path): | |
1558 shutil.rmtree(path) | |
1559 else: | |
1560 os.remove(path) | |
1561 except OSError as e: | |
1562 log.error("Error: %s - %s." % (e.filename, e.strerror)) | |
1563 for neededfile in ["jb2_webserver.py", "bedscoreplugin.js"]: | |
1564 shutil.copyfile( | |
1565 os.path.join(INSTALLED_TO, neededfile), os.path.join(dest, neededfile) | |
1566 ) | |
1567 | 1538 |
1568 | 1539 |
1569 def parse_style_conf(item): | 1540 def parse_style_conf(item): |
1570 if item.text.lower() in ["false", "true", "yes", "no"]: | 1541 if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]: |
1571 return item.text.lower in ("yes", "true") | 1542 if item.attrib["type"] == "boolean": |
1572 elif item.text.isdigit(): | 1543 return item.text in ("yes", "true", "True") |
1573 return int(item.text) | 1544 elif item.attrib["type"] == "integer": |
1574 return item.text | 1545 return int(item.text) |
1546 else: | |
1547 return item.text | |
1548 | |
1549 | |
1550 def validate_synteny(real_root): | |
1551 | |
1552 if len(real_root.findall('assembly/tracks/track[@format="synteny"]')) == 0: | |
1553 # No synteny data, all good | |
1554 return False | |
1555 | |
1556 assemblies = real_root.findall("assembly") | |
1557 | |
1558 if len(assemblies[-1].findall('tracks/track[@format="synteny"]')) > 0 and \ | |
1559 assemblies[-1].find('tracks/track[@format="synteny"]/options/style/display').text == "LinearSyntenyDisplay": | |
1560 raise RuntimeError("You should not set a synteny track on the last genome.") | |
1561 | |
1562 for assembly in assemblies[1:0]: | |
1563 if len(assembly.findall('tracks/track[@format="synteny"]')) != 1 and \ | |
1564 assembly.find('tracks/track[@format="synteny"]/options/style/display').text == "LinearSyntenyDisplay": | |
1565 raise RuntimeError("To use the synteny viewer, you should add a synteny track to each assembly, except the last one.") | |
1566 | |
1567 return True | |
1575 | 1568 |
1576 | 1569 |
1577 if __name__ == "__main__": | 1570 if __name__ == "__main__": |
1578 parser = argparse.ArgumentParser(description="", epilog="") | 1571 parser = argparse.ArgumentParser(description="", epilog="") |
1579 parser.add_argument("--xml", help="Track Configuration") | 1572 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") |
1580 parser.add_argument( | 1573 |
1581 "--jbrowse2path", help="Path to JBrowse2 directory in BioContainer or Conda" | 1574 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') |
1582 ) | 1575 parser.add_argument("--update", help="Update an existing JBrowse2 instance", action="store_true") |
1583 parser.add_argument("--outdir", help="Output directory", default="out") | 1576 parser.add_argument("--outdir", help="Output directory", default="out") |
1584 parser.add_argument("--version", "-V", action="version", version=JB2VER) | |
1585 args = parser.parse_args() | 1577 args = parser.parse_args() |
1586 tree = ET.parse(args.xml) | 1578 |
1587 root = tree.getroot() | 1579 tree = ET.parse(args.xml.name) |
1588 removeMe = string.punctuation.replace(".", " ").replace("/", "").replace("-", "") | 1580 real_root = tree.getroot() |
1589 # first is a space because space needs to be added here for removal from labels as paths. | 1581 |
1590 nopunct = str.maketrans(dict.fromkeys(removeMe)) | |
1591 # This should be done ASAP | 1582 # This should be done ASAP |
1592 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text | |
1593 # Sometimes this comes as `localhost` without a protocol | 1583 # Sometimes this comes as `localhost` without a protocol |
1584 GALAXY_INFRASTRUCTURE_URL = real_root.find("metadata/galaxyUrl").text | |
1594 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): | 1585 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): |
1595 # so we'll prepend `http://` and hope for the best. Requests *should* | 1586 # so we'll prepend `http://` and hope for the best. Requests *should* |
1596 # be GET and not POST so it should redirect OK | 1587 # be GET and not POST so it should redirect OK |
1597 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL | 1588 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL |
1598 | 1589 |
1599 jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path) | 1590 jc = JbrowseConnector( |
1600 | 1591 jbrowse=args.jbrowse, |
1601 default_session_data = {} | 1592 outdir=args.outdir, |
1602 trackI = 0 | 1593 update=args.update, |
1603 for ass in root.findall("assembly"): | 1594 ) |
1604 genomes = [ | 1595 |
1605 { | 1596 # Synteny options are special, check them first |
1606 "path": x.attrib["path"], | 1597 jc.use_synteny_viewer = validate_synteny(real_root) |
1607 "label": x.attrib["label"].split(" ")[0].translate(nopunct), | 1598 |
1608 "useuri": x.attrib["useuri"], | 1599 for assembly in real_root.findall("assembly"): |
1609 "meta": metadata_from_node(x.find("metadata")), | 1600 genome_el = assembly.find('genome') |
1610 } | 1601 |
1611 for x in ass.findall("metadata/genomes/genome") | 1602 is_remote = genome_el.attrib.get("remote", "false") == "true" |
1612 ] | 1603 |
1613 primaryGenome = jc.process_genomes(genomes) | 1604 genome = { |
1614 if not default_session_data.get(primaryGenome, None): | 1605 "path": genome_el.attrib["path"] if is_remote else os.path.realpath(genome_el.attrib["path"]), |
1615 default_session_data[primaryGenome] = { | 1606 "meta": metadata_from_node(genome_el.find("metadata")), |
1616 "tracks": [], | 1607 "label": genome_el.attrib["label"], |
1617 "style": {}, | 1608 } |
1618 "style_labels": {}, | 1609 |
1619 "visibility": { | 1610 cytobands = None |
1620 "default_on": [], | 1611 cytobands_el = genome_el.find("cytobands") |
1621 "default_off": [], | 1612 if cytobands_el is not None and "path" in cytobands_el.attrib: |
1622 }, | 1613 cytobands = cytobands_el.attrib["path"] |
1623 } | 1614 |
1624 for track in ass.find("tracks"): | 1615 ref_name_aliases = None |
1616 ref_name_aliases_el = genome_el.find("ref_name_aliases") | |
1617 if ref_name_aliases_el is not None and "path" in ref_name_aliases_el.attrib: | |
1618 ref_name_aliases = ref_name_aliases_el.attrib["path"] | |
1619 | |
1620 log.debug("Processing genome", genome) | |
1621 genome["uniq_id"] = jc.add_assembly(genome["path"], genome["label"], is_remote, cytobands, ref_name_aliases) | |
1622 | |
1623 default_tracks_on = [] | |
1624 | |
1625 track_num = 0 | |
1626 for track in assembly.findall("tracks/track"): | |
1625 track_conf = {} | 1627 track_conf = {} |
1626 track_conf["trackfiles"] = [] | 1628 track_conf["trackfiles"] = [] |
1627 track_conf["assemblyNames"] = primaryGenome | 1629 track_conf["track_num"] = track_num |
1628 is_multi_bigwig = False | 1630 |
1631 trackfiles = track.findall("files/trackFile") or [] | |
1632 | |
1633 is_multi = False | |
1634 multi_paths = [] | |
1635 multi_type = None | |
1636 multi_metadata = {} | |
1629 try: | 1637 try: |
1630 if track.find("options/wiggle/multibigwig") and ( | 1638 multi_in_xml = track.find("options/multitrack") |
1631 track.find("options/wiggle/multibigwig").text == "True" | 1639 if multi_in_xml is not None and parse_style_conf(multi_in_xml): |
1632 ): | 1640 is_multi = True |
1633 is_multi_bigwig = True | 1641 multi_paths = [] |
1634 multi_bigwig_paths = [] | 1642 multi_type = trackfiles[0].attrib["ext"] |
1635 except KeyError: | 1643 except KeyError: |
1636 pass | 1644 pass |
1637 | 1645 |
1638 trackfiles = track.findall("files/trackFile") | 1646 is_remote = False |
1639 if trackfiles: | 1647 if trackfiles: |
1640 for x in trackfiles: | 1648 for x in trackfiles: |
1641 isBed = False | 1649 if is_multi: |
1642 if x.attrib["ext"] == "bed": | 1650 is_remote = x.attrib.get("remote", "false") == "true" |
1643 isBed = True | 1651 multi_paths.append( |
1644 track_conf["label"] = "%s_%d" % ( | 1652 (x.attrib["label"], x.attrib["path"] if is_remote else os.path.realpath(x.attrib["path"])) |
1645 x.attrib["label"].translate(nopunct), | 1653 ) |
1646 trackI, | 1654 multi_metadata.update(metadata_from_node(x.find("metadata"))) |
1647 ) | 1655 else: |
1648 trackI += 1 | 1656 metadata = metadata_from_node(x.find("metadata")) |
1649 track_conf["useuri"] = x.attrib["useuri"] | 1657 is_remote = x.attrib.get("remote", "false") == "true" |
1650 if is_multi_bigwig: | 1658 track_conf["trackfiles"].append( |
1651 multi_bigwig_paths.append( | |
1652 ( | 1659 ( |
1653 track_conf["label"].translate(nopunct), | 1660 x.attrib["path"] if is_remote else os.path.realpath(x.attrib["path"]), |
1654 track_conf["useuri"], | 1661 x.attrib["ext"], |
1655 os.path.realpath(x.attrib["path"]), | 1662 x.attrib["label"], |
1663 metadata, | |
1656 ) | 1664 ) |
1657 ) | 1665 ) |
1658 else: | 1666 else: |
1659 metadata = metadata_from_node(x.find("metadata")) | 1667 # For tracks without files (sparql, gc) |
1660 track_conf["dataset_id"] = metadata.get("dataset_id", "None") | 1668 track_conf["trackfiles"].append( |
1661 if x.attrib["useuri"].lower() == "yes": | 1669 ( |
1662 tfa = ( | 1670 "", # N/A, no path for sparql or gc |
1663 x.attrib["path"], | 1671 track.attrib["format"], |
1664 x.attrib["ext"], | 1672 track.find("options/label").text, |
1665 x.attrib["useuri"], | 1673 {}, |
1666 track_conf["label"], | |
1667 metadata, | |
1668 ) | |
1669 else: | |
1670 tfa = ( | |
1671 os.path.realpath(x.attrib["path"]), | |
1672 x.attrib["ext"], | |
1673 x.attrib["useuri"], | |
1674 track_conf["label"], | |
1675 metadata, | |
1676 ) | |
1677 track_conf["trackfiles"].append(tfa) | |
1678 | |
1679 if is_multi_bigwig: | |
1680 metadata = metadata_from_node(x.find("metadata")) | |
1681 | |
1682 track_conf["trackfiles"].append( | |
1683 ( | |
1684 multi_bigwig_paths, # Passing an array of paths to represent as one track | |
1685 "bigwig_multiple", | |
1686 "MultiBigWig", # Giving an hardcoded name for now | |
1687 {}, # No metadata for multiple bigwig | |
1688 ) | |
1689 ) | 1674 ) |
1675 ) | |
1676 | |
1677 if is_multi: | |
1678 etal_tracks_nb = len(multi_paths[1:]) | |
1679 multi_label = f"{multi_paths[0][0]} + {etal_tracks_nb} other track{'s' if etal_tracks_nb > 1 else ''}" | |
1680 | |
1681 track_conf["trackfiles"].append( | |
1682 ( | |
1683 multi_paths, # Passing an array of paths to represent as one track | |
1684 multi_type, # First file type | |
1685 multi_label, # First file label | |
1686 multi_metadata, # Mix of all metadata for multiple bigwig => only last file metadata coming from galaxy + custom oness | |
1687 ) | |
1688 ) | |
1690 track_conf["category"] = track.attrib["cat"] | 1689 track_conf["category"] = track.attrib["cat"] |
1691 track_conf["format"] = track.attrib["format"] | 1690 track_conf["format"] = track.attrib["format"] |
1691 track_conf["style"] = { | |
1692 item.tag: parse_style_conf(item) for item in (track.find("options/style") or []) | |
1693 } | |
1694 | |
1695 track_conf["style"] = { | |
1696 item.tag: parse_style_conf(item) for item in (track.find("options/style") or []) | |
1697 } | |
1698 | |
1699 track_conf["style_labels"] = { | |
1700 item.tag: parse_style_conf(item) | |
1701 for item in (track.find("options/style_labels") or []) | |
1702 } | |
1703 track_conf["formatdetails"] = { | |
1704 item.tag: parse_style_conf(item) for item in (track.find("options/formatdetails") or []) | |
1705 } | |
1706 | |
1692 track_conf["conf"] = etree_to_dict(track.find("options")) | 1707 track_conf["conf"] = etree_to_dict(track.find("options")) |
1693 keys = jc.process_annotations(track_conf) | 1708 |
1694 if keys: | 1709 track_conf["remote"] = is_remote |
1695 for key in keys: | 1710 |
1696 vis = track.attrib.get("visibility", "default_off") | 1711 track_labels = jc.process_annotations(track_conf, genome) |
1697 if not vis: | 1712 |
1698 vis = "default_off" | 1713 if track.attrib["visibility"] == "default_on": |
1699 default_session_data[primaryGenome]["visibility"][vis].append(key) | 1714 for tlabel in track_labels: |
1700 trakdat = jc.tracksToAdd[primaryGenome] | 1715 default_tracks_on.append(tlabel) |
1701 stile = {} | 1716 |
1702 for trak in trakdat: | 1717 track_num += 1 |
1703 if trak["trackId"] == key: | 1718 |
1704 stile = trak.get("style", {}) | 1719 default_loc = assembly.find("defaultLocation").text |
1705 if len(track.find("options/style")) > 0: | 1720 |
1706 for item in track.find("options/style"): | 1721 jc.default_views[genome['uniq_id']] = jc.add_default_view_genome(genome, default_loc, default_tracks_on) |
1707 if item.text: | 1722 |
1708 stile[item.tag] = parse_style_conf(item) | 1723 if jc.use_synteny_viewer: |
1709 logging.debug("stile=%s" % stile) | 1724 synteny_view = jc.add_default_view_synteny(list(jc.default_views.values()), jc.synteny_tracks) |
1710 default_session_data[primaryGenome]["style"][key] = stile | 1725 |
1711 default_session_data[primaryGenome]["tracks"].append(key) | 1726 views_for_session = jc._load_old_synteny_views() |
1712 default_session_data["defaultLocation"] = root.find( | 1727 |
1713 "metadata/general/defaultLocation" | 1728 views_for_session.append(synteny_view) |
1714 ).text | 1729 else: |
1715 default_session_data["session_name"] = root.find( | 1730 old_views = jc._load_old_genome_views() |
1716 "metadata/general/session_name" | 1731 |
1717 ).text | 1732 for old_view in old_views: |
1718 logging.debug("default_session=%s" % (json.dumps(default_session_data, indent=2))) | 1733 if old_view not in jc.default_views: |
1719 jc.zipOut = root.find("metadata/general/zipOut").text == "true" | 1734 jc.default_views[old_view] = old_views[old_view] |
1720 jc.bpPerPx = int(root.find("metadata/general/bpPerPx").text) | 1735 |
1736 views_for_session = list(jc.default_views.values()) | |
1737 | |
1721 general_data = { | 1738 general_data = { |
1722 "analytics": root.find("metadata/general/analytics").text, | 1739 "analytics": real_root.find("metadata/general/analytics").text, |
1723 "primary_color": root.find("metadata/general/primary_color").text, | 1740 "primary_color": real_root.find("metadata/general/primary_color").text, |
1724 "secondary_color": root.find("metadata/general/secondary_color").text, | 1741 "secondary_color": real_root.find("metadata/general/secondary_color").text, |
1725 "tertiary_color": root.find("metadata/general/tertiary_color").text, | 1742 "tertiary_color": real_root.find("metadata/general/tertiary_color").text, |
1726 "quaternary_color": root.find("metadata/general/quaternary_color").text, | 1743 "quaternary_color": real_root.find("metadata/general/quaternary_color").text, |
1727 "font_size": root.find("metadata/general/font_size").text, | 1744 "font_size": real_root.find("metadata/general/font_size").text, |
1728 } | 1745 } |
1746 | |
1747 jc.add_default_session(views_for_session) | |
1729 jc.add_general_configuration(general_data) | 1748 jc.add_general_configuration(general_data) |
1730 jc.add_default_session(default_session_data) | 1749 jc.add_plugins(jc.plugins) |
1731 trackconf = jc.config_json.get("tracks", []) | 1750 jc.text_index() |
1732 for gnome in jc.genome_names: | |
1733 gtracks = jc.tracksToAdd[gnome] | |
1734 if len(gtracks) > 0: | |
1735 logging.debug( | |
1736 "for genome %s adding gtracks %s" | |
1737 % (gnome, json.dumps(gtracks, indent=2)) | |
1738 ) | |
1739 trackconf += gtracks | |
1740 jc.config_json["tracks"] = trackconf | |
1741 assconf = jc.config_json.get("assemblies", []) | |
1742 assconf += jc.assemblies | |
1743 jc.config_json["assemblies"] = assconf | |
1744 logging.debug( | |
1745 "assmeta=%s, first_contigs=%s, assemblies=%s, gnames=%s, trackidlist=%s, tracks=%s" | |
1746 % ( | |
1747 jc.assmeta, | |
1748 jc.ass_first_contigs, | |
1749 json.dumps(assconf, indent=2), | |
1750 jc.genome_names, | |
1751 jc.trackIdlist, | |
1752 json.dumps(trackconf, indent=2), | |
1753 ) | |
1754 ) | |
1755 jc.write_config() | |
1756 # note that this can be left in the config.json but has NO EFFECT if add_defsess_to_index is called. | |
1757 # jc.add_defsess_to_index(default_session_data) | |
1758 # this command line tool appears currently broken - or at least not working here. | |
1759 # jc.text_index() |