Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 98:b1260bca5fdc draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 44d8fc559ecf5463a8f753561976fa26686c96f6
author | bgruening |
---|---|
date | Wed, 05 Jun 2024 10:00:07 +0000 |
parents | 74074746ccd8 |
children | 990291e918c7 |
comparison
equal
deleted
inserted
replaced
97:74074746ccd8 | 98:b1260bca5fdc |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 import argparse | 3 import argparse |
4 import binascii | 4 import binascii |
5 import datetime | 5 import datetime |
6 # import hashlib | |
6 import json | 7 import json |
7 import logging | 8 import logging |
8 import os | 9 import os |
9 import re | 10 import re |
10 import shutil | 11 import shutil |
18 | 19 |
19 logging.basicConfig(level=logging.DEBUG) | 20 logging.basicConfig(level=logging.DEBUG) |
20 log = logging.getLogger("jbrowse") | 21 log = logging.getLogger("jbrowse") |
21 | 22 |
22 JB2VER = "v2.11.0" | 23 JB2VER = "v2.11.0" |
23 # version pinned if cloning - but not cloning now | 24 # version pinned if cloning - but not used until now |
24 logCommands = True | 25 logCommands = True |
25 # useful for seeing what's being written but NOT for production setups | 26 # useful for seeing what's being written but not for production setups |
26 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") | 27 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") |
27 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) | 28 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) |
28 GALAXY_INFRASTRUCTURE_URL = None | 29 GALAXY_INFRASTRUCTURE_URL = None |
29 mapped_chars = { | 30 mapped_chars = { |
30 ">": "__gt__", | 31 ">": "__gt__", |
37 "}": "__cc__", | 38 "}": "__cc__", |
38 "@": "__at__", | 39 "@": "__at__", |
39 "#": "__pd__", | 40 "#": "__pd__", |
40 "": "__cn__", | 41 "": "__cn__", |
41 } | 42 } |
43 | |
44 | |
45 INDEX_TEMPLATE = """<!doctype html> | |
46 <html lang="en" style="height:100%"> | |
47 <head> | |
48 <meta charset="utf-8"/> | |
49 <link rel="shortcut icon" href="./favicon.ico"/> | |
50 <meta name="viewport" content="width=device-width,initial-scale=1"/> | |
51 <meta name="theme-color" content="#000000"/> | |
52 <meta name="description" content="A fast and flexible genome browser"/> | |
53 <link rel="manifest" href="./manifest.json"/> | |
54 <title>JBrowse</title> | |
55 </script> | |
56 </head> | |
57 <body style="overscroll-behavior:none; height:100%; margin: 0;"> | |
58 <iframe | |
59 id="jbframe" | |
60 title="JBrowse2" | |
61 frameborder="0" | |
62 width="100%" | |
63 height="100%" | |
64 src='index_noview.html?config=config.json__SESSION_SPEC__'> | |
65 </iframe> | |
66 </body> | |
67 </html> | |
68 """ | |
42 | 69 |
43 | 70 |
44 class ColorScaling(object): | 71 class ColorScaling(object): |
45 | 72 |
46 COLOR_FUNCTION_TEMPLATE = """ | 73 COLOR_FUNCTION_TEMPLATE = """ |
396 def get_cwd(self, cwd): | 423 def get_cwd(self, cwd): |
397 if cwd: | 424 if cwd: |
398 return self.outdir | 425 return self.outdir |
399 else: | 426 else: |
400 return subprocess.check_output(["pwd"]).decode("utf-8").strip() | 427 return subprocess.check_output(["pwd"]).decode("utf-8").strip() |
401 # return None | |
402 | 428 |
403 def subprocess_check_call(self, command, output=None, cwd=True): | 429 def subprocess_check_call(self, command, output=None, cwd=True): |
404 if output: | 430 if output: |
405 if logCommands: | 431 if logCommands: |
406 log.debug( | 432 log.debug( |
427 retcode = p.returncode | 453 retcode = p.returncode |
428 if retcode != 0: | 454 if retcode != 0: |
429 log.error(command) | 455 log.error(command) |
430 log.error(output) | 456 log.error(output) |
431 log.error(err) | 457 log.error(err) |
432 raise RuntimeError("Command failed with exit code %s" % (retcode)) | 458 raise RuntimeError(f"Command ( {command} ) failed with exit code {retcode}") |
433 | 459 |
434 def subprocess_check_output(self, command): | 460 def subprocess_check_output(self, command): |
435 if logCommands: | 461 if logCommands: |
436 log.debug(" ".join(command)) | 462 log.debug(" ".join(command)) |
437 return subprocess.check_output(command, cwd=self.outdir) | 463 return subprocess.check_output(command, cwd=self.outdir) |
471 with urllib.request.urlopen(url, context=scontext) as f: | 497 with urllib.request.urlopen(url, context=scontext) as f: |
472 fl = f.readlines() | 498 fl = f.readlines() |
473 nrow = len(fl) | 499 nrow = len(fl) |
474 except Exception: | 500 except Exception: |
475 nrow = 0 | 501 nrow = 0 |
476 logging.debug("### getNrow %s returning %d" % (url, nrow)) | 502 logging.debug("getNrow %s returning %d" % (url, nrow)) |
477 return nrow | 503 return nrow |
478 | 504 |
479 def process_genomes(self, genomes): | 505 def process_genomes(self, genomes): |
480 assembly = [] | 506 assembly = [] |
481 assmeta = [] | 507 assmeta = [] |
512 self.assmeta[primaryGenome] = assmeta | 538 self.assmeta[primaryGenome] = assmeta |
513 self.tracksToAdd[primaryGenome] = [] | 539 self.tracksToAdd[primaryGenome] = [] |
514 return primaryGenome | 540 return primaryGenome |
515 | 541 |
516 def make_assembly(self, fapath, gname, useuri): | 542 def make_assembly(self, fapath, gname, useuri): |
517 """added code to grab the first contig name and length for broken default session from Anthony and Helena's code | |
518 that poor Bjoern is trying to figure out. | |
519 """ | |
520 if useuri: | 543 if useuri: |
521 faname = fapath | 544 faname = fapath |
522 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) | 545 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) |
523 scontext.check_hostname = False | 546 scontext.check_hostname = False |
524 scontext.verify_mode = ssl.VerifyMode.CERT_NONE | 547 scontext.verify_mode = ssl.VerifyMode.CERT_NONE |
673 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", | 696 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", |
674 } | 697 } |
675 ] | 698 ] |
676 } | 699 } |
677 categ = trackData["category"] | 700 categ = trackData["category"] |
678 fname = tId | 701 fname = f"{tId}" |
679 dest = "%s/%s" % (self.outdir, fname) | 702 dest = os.path.join(self.outdir, fname) |
680 gname = trackData["assemblyNames"] | 703 gname = trackData["assemblyNames"] |
681 | 704 |
682 cmd = [ | 705 cmd = [ |
683 "bash", | 706 "bash", |
684 os.path.join(INSTALLED_TO, "convertMAF.sh"), | 707 os.path.join(INSTALLED_TO, "convertMAF.sh"), |
929 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) | 952 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) |
930 self.trackIdlist.append(tId) | 953 self.trackIdlist.append(tId) |
931 | 954 |
932 def add_vcf(self, data, trackData): | 955 def add_vcf(self, data, trackData): |
933 tId = trackData["label"] | 956 tId = trackData["label"] |
934 # url = "%s/api/datasets/%s/display" % ( | |
935 # self.giURL, | |
936 # trackData["metadata"]["dataset_id"], | |
937 # ) | |
938 categ = trackData["category"] | 957 categ = trackData["category"] |
939 useuri = trackData["useuri"].lower() == "yes" | 958 useuri = trackData["useuri"].lower() == "yes" |
940 if useuri: | 959 if useuri: |
941 url = data | 960 url = data |
942 else: | 961 else: |
1193 track_human_label = track_human_label.replace(" ", "_") | 1212 track_human_label = track_human_label.replace(" ", "_") |
1194 outputTrackConfig = { | 1213 outputTrackConfig = { |
1195 "category": category, | 1214 "category": category, |
1196 "style": {}, | 1215 "style": {}, |
1197 } | 1216 } |
1217 | |
1218 # hashData = [ | |
1219 # str(dataset_path), | |
1220 # track_human_label, | |
1221 # track["category"], | |
1222 # ] | |
1223 # hashData = "|".join(hashData).encode("utf-8") | |
1224 # hash_string = hashlib.md5(hashData).hexdigest() | |
1225 | |
1198 outputTrackConfig["assemblyNames"] = track["assemblyNames"] | 1226 outputTrackConfig["assemblyNames"] = track["assemblyNames"] |
1199 outputTrackConfig["key"] = track_human_label | 1227 outputTrackConfig["key"] = track_human_label |
1200 outputTrackConfig["useuri"] = useuri | 1228 outputTrackConfig["useuri"] = useuri |
1201 outputTrackConfig["path"] = dataset_path | 1229 outputTrackConfig["path"] = dataset_path |
1202 outputTrackConfig["ext"] = dataset_ext | 1230 outputTrackConfig["ext"] = dataset_ext |
1203 | |
1204 outputTrackConfig["trackset"] = track.get("trackset", {}) | 1231 outputTrackConfig["trackset"] = track.get("trackset", {}) |
1205 outputTrackConfig["label"] = track["label"] | 1232 outputTrackConfig["label"] = track["label"] |
1233 # outputTrackConfig["label"] = "%s_%i_%s_%s" % ( | |
1234 # dataset_ext, | |
1235 # trackIndex, | |
1236 # track_human_label, | |
1237 # hash_string, | |
1238 # ) | |
1239 | |
1206 outputTrackConfig["metadata"] = extra_metadata | 1240 outputTrackConfig["metadata"] = extra_metadata |
1207 outputTrackConfig["name"] = track_human_label | 1241 outputTrackConfig["name"] = track_human_label |
1208 if track["label"] in self.trackIdlist: | 1242 if track["label"] in self.trackIdlist: |
1209 logging.error( | 1243 logging.error( |
1210 "### not adding %s already in %s" | 1244 "### not adding %s already in %s" |
1235 "hic", | 1269 "hic", |
1236 dataset_path, | 1270 dataset_path, |
1237 hic_path, | 1271 hic_path, |
1238 ] | 1272 ] |
1239 ) | 1273 ) |
1240 logging.debug( | |
1241 "### ext=cool: wasCool=%s, hic_path=%s" | |
1242 % (outputTrackConfig["wasCool"], hic_path) | |
1243 ) | |
1244 self.add_hic( | 1274 self.add_hic( |
1245 hic_path, | 1275 hic_path, |
1246 outputTrackConfig, | 1276 outputTrackConfig, |
1247 ) | 1277 ) |
1248 elif dataset_ext in ("bed",): | 1278 elif dataset_ext in ("bed",): |
1261 dataset_path, | 1291 dataset_path, |
1262 outputTrackConfig, | 1292 outputTrackConfig, |
1263 ) | 1293 ) |
1264 elif dataset_ext == "bam": | 1294 elif dataset_ext == "bam": |
1265 real_indexes = track["conf"]["options"]["bam"]["bam_index"] | 1295 real_indexes = track["conf"]["options"]["bam"]["bam_index"] |
1266 logging.debug("**** add bam got %s for indexes" % real_indexes) | |
1267 self.add_bam( | 1296 self.add_bam( |
1268 dataset_path, | 1297 dataset_path, |
1269 outputTrackConfig, | 1298 outputTrackConfig, |
1270 bam_indexes=real_indexes, | 1299 bam_indexes=real_indexes, |
1271 ) | 1300 ) |
1272 elif dataset_ext == "cram": | 1301 elif dataset_ext == "cram": |
1273 real_indexes = track["conf"]["options"]["cram"]["cram_index"] | 1302 real_indexes = track["conf"]["options"]["cram"]["cram_index"] |
1274 logging.debug("**** add cram got %s for indexes" % real_indexes) | |
1275 self.add_cram( | 1303 self.add_cram( |
1276 dataset_path, | 1304 dataset_path, |
1277 outputTrackConfig, | 1305 outputTrackConfig, |
1278 cram_indexes=real_indexes, | 1306 cram_indexes=real_indexes, |
1279 ) | 1307 ) |
1290 dataset_path, | 1318 dataset_path, |
1291 outputTrackConfig, | 1319 outputTrackConfig, |
1292 track["conf"]["options"]["paf"], | 1320 track["conf"]["options"]["paf"], |
1293 ) | 1321 ) |
1294 else: | 1322 else: |
1295 logging.warn("Do not know how to handle %s", dataset_ext) | 1323 logging.warning("Do not know how to handle %s", dataset_ext) |
1296 # Return non-human label for use in other fields | 1324 # Return non-human label for use in other fields |
1297 yield outputTrackConfig["label"] | 1325 yield outputTrackConfig["label"] |
1298 | 1326 |
1299 def add_default_session(self, default_data): | 1327 def add_default_session(self, default_data): |
1300 """ | 1328 """ |
1301 default session settings are hard and fragile. | 1329 default session settings are hard and fragile. |
1302 .add_default_view() and other configuration code adapted from | 1330 .add_default_view() and other configuration code adapted from |
1303 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py | 1331 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py |
1304 """ | 1332 """ |
1305 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 | 1333 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 |
1306 bpPerPx = 50 # this is tricky since browser window width is unknown - this seems a compromise that sort of works.... | |
1307 track_types = {} | 1334 track_types = {} |
1308 with open(self.config_json_file, "r") as config_file: | 1335 with open(self.config_json_file, "r") as config_file: |
1309 config_json = json.load(config_file) | 1336 config_json = json.load(config_file) |
1310 if self.config_json: | 1337 if self.config_json: |
1311 config_json.update(self.config_json) | 1338 config_json.update(self.config_json) |
1340 "type": track_types[tId], | 1367 "type": track_types[tId], |
1341 "configuration": tId, | 1368 "configuration": tId, |
1342 "displays": [style_data], | 1369 "displays": [style_data], |
1343 } | 1370 } |
1344 ) | 1371 ) |
1372 view_json = { | |
1373 "type": "LinearGenomeView", | |
1374 "offsetPx": 0, | |
1375 "minimized": False, | |
1376 "tracks": tracks_data, | |
1377 } | |
1345 first = [x for x in self.ass_first_contigs if x[0] == gnome] | 1378 first = [x for x in self.ass_first_contigs if x[0] == gnome] |
1346 drdict = { | 1379 if len(first) > 0: |
1380 [gnome, refName, end] = first[0] | |
1381 start = 0 | |
1382 end = int(end) | |
1383 drdict = { | |
1384 "refName": refName, | |
1385 "start": start, | |
1386 "end": end, | |
1347 "reversed": False, | 1387 "reversed": False, |
1348 "assemblyName": gnome, | 1388 "assemblyName": gnome, |
1349 } | 1389 } |
1350 if len(first) > 0: | |
1351 [gnome, refName, end] = first[0] | |
1352 drdict["refName"] = refName | |
1353 drdict["start"] = 0 | |
1354 end = int(end) | |
1355 drdict["end"] = end | |
1356 else: | 1390 else: |
1357 ddl = default_data.get("defaultLocation", None) | 1391 ddl = default_data.get("defaultLocation", None) |
1358 if ddl: | 1392 if ddl: |
1359 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) | 1393 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) |
1360 # allow commas like 100,000 but ignore as integer | 1394 # allow commas like 100,000 but ignore as integer |
1368 else: | 1402 else: |
1369 logging.info( | 1403 logging.info( |
1370 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" | 1404 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" |
1371 % ddl | 1405 % ddl |
1372 ) | 1406 ) |
1373 view_json = { | |
1374 "type": "LinearGenomeView", | |
1375 "offsetPx": 0, | |
1376 "bpPerPx" : bpPerPx, | |
1377 "minimized": False, | |
1378 "tracks": tracks_data | |
1379 } | |
1380 if drdict.get("refName", None): | 1407 if drdict.get("refName", None): |
1381 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome | 1408 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome |
1382 view_json["displayedRegions"] = [ | 1409 view_json["displayedRegions"] = [ |
1383 drdict, | 1410 drdict, |
1384 ] | 1411 ] |
1390 session_views.append(view_json) | 1417 session_views.append(view_json) |
1391 session_name = default_data.get("session_name", "New session") | 1418 session_name = default_data.get("session_name", "New session") |
1392 for key, value in mapped_chars.items(): | 1419 for key, value in mapped_chars.items(): |
1393 session_name = session_name.replace(value, key) | 1420 session_name = session_name.replace(value, key) |
1394 session_json["name"] = session_name | 1421 session_json["name"] = session_name |
1422 | |
1395 if "views" not in session_json: | 1423 if "views" not in session_json: |
1396 session_json["views"] = session_views | 1424 session_json["views"] = session_views |
1397 else: | 1425 else: |
1398 session_json["views"] += session_views | 1426 session_json["views"] += session_views |
1427 | |
1399 pp = json.dumps(session_views, indent=2) | 1428 pp = json.dumps(session_views, indent=2) |
1400 config_json["defaultSession"] = session_json | 1429 config_json["defaultSession"] = session_json |
1401 self.config_json.update(config_json) | 1430 self.config_json.update(config_json) |
1402 logging.debug("defaultSession=%s" % (pp)) | 1431 logging.debug("defaultSession=%s" % (pp)) |
1403 with open(self.config_json_file, "w") as config_file: | 1432 with open(self.config_json_file, "w") as config_file: |
1428 Xrefs to understand the choices: | 1457 Xrefs to understand the choices: |
1429 https://github.com/GMOD/jbrowse-components/issues/2708 | 1458 https://github.com/GMOD/jbrowse-components/issues/2708 |
1430 https://github.com/GMOD/jbrowse-components/discussions/3568 | 1459 https://github.com/GMOD/jbrowse-components/discussions/3568 |
1431 https://github.com/GMOD/jbrowse-components/pull/4148 | 1460 https://github.com/GMOD/jbrowse-components/pull/4148 |
1432 """ | 1461 """ |
1433 | |
1434 | |
1435 INDEX_TEMPLATE = """<!doctype html> | |
1436 <html lang="en" style="height:100%"> | |
1437 <head> | |
1438 <meta charset="utf-8"/> | |
1439 <link rel="shortcut icon" href="./favicon.ico"/> | |
1440 <meta name="viewport" content="width=device-width,initial-scale=1"/> | |
1441 <meta name="theme-color" content="#000000"/> | |
1442 <meta name="description" content="A fast and flexible genome browser"/> | |
1443 <link rel="manifest" href="./manifest.json"/> | |
1444 <title>JBrowse</title> | |
1445 </script> | |
1446 </head> | |
1447 <body style="overscroll-behavior:none; height:100%; margin: 0;"> | |
1448 <iframe | |
1449 id="jbframe" | |
1450 title="JBrowse2" | |
1451 frameborder="0" | |
1452 width="100%" | |
1453 height="100%" | |
1454 src='index_noview.html?config=config.json__SESSION_SPEC__'> | |
1455 </iframe> | |
1456 </body> | |
1457 </html> | |
1458 """ | |
1459 | |
1460 new_index = "Nothing written" | 1462 new_index = "Nothing written" |
1461 session_spec = {"views": []} | 1463 session_spec = {"views": []} |
1462 logging.debug("def ass_first=%s\ndata=%s" % (self.ass_first_contigs, data)) | 1464 logging.debug("def ass_first=%s\ndata=%s" % (self.ass_first_contigs, data)) |
1463 for first_contig in self.ass_first_contigs: | 1465 for first_contig in self.ass_first_contigs: |
1464 logging.debug("first contig=%s" % self.ass_first_contigs) | 1466 logging.debug("first contig=%s" % self.ass_first_contigs) |
1520 self.config_json.update(config_json) | 1522 self.config_json.update(config_json) |
1521 with open(config_path, "w") as config_file: | 1523 with open(config_path, "w") as config_file: |
1522 json.dump(self.config_json, config_file, indent=2) | 1524 json.dump(self.config_json, config_file, indent=2) |
1523 | 1525 |
1524 def clone_jbrowse(self, realclone=False): | 1526 def clone_jbrowse(self, realclone=False): |
1525 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now | 1527 """ |
1526 Leave as True between version updates on temporary tools - requires manual conda trigger :( | 1528 Clone a JBrowse directory into a destination directory. |
1529 | |
1530 `realclone=true` will use the `jbrowse create` command. | |
1531 To allow running on internet-less compute and for reproducibility | |
1532 use frozen code with `realclone=false | |
1533 | |
1527 """ | 1534 """ |
1528 dest = self.outdir | 1535 dest = self.outdir |
1529 if realclone: | 1536 if realclone: |
1530 self.subprocess_check_call( | 1537 self.subprocess_check_call( |
1531 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"] | 1538 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"] |