Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 73:3b2815efa5d9 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit b361309b05a861da9b64e1324157a8c32767e0bf
author | fubar |
---|---|
date | Mon, 01 Apr 2024 03:41:42 +0000 |
parents | 2bdb748df098 |
children | 13ede71c3a4b |
comparison
equal
deleted
inserted
replaced
72:2bdb748df098 | 73:3b2815efa5d9 |
---|---|
6 import json | 6 import json |
7 import logging | 7 import logging |
8 import os | 8 import os |
9 import re | 9 import re |
10 import shutil | 10 import shutil |
11 import ssl | |
11 import struct | 12 import struct |
12 import subprocess | 13 import subprocess |
13 import tempfile | 14 import tempfile |
14 import urllib.request | 15 import urllib.request |
15 import xml.etree.ElementTree as ET | 16 import xml.etree.ElementTree as ET |
35 "}": "__cc__", | 36 "}": "__cc__", |
36 "@": "__at__", | 37 "@": "__at__", |
37 "#": "__pd__", | 38 "#": "__pd__", |
38 "": "__cn__", | 39 "": "__cn__", |
39 } | 40 } |
41 | |
42 | |
43 INDEX_TEMPLATE = """<!doctype html> | |
44 <html lang="en" style="height:100%"> | |
45 <head> | |
46 <meta charset="utf-8"/> | |
47 <link rel="shortcut icon" href="./favicon.ico"/> | |
48 <meta name="viewport" content="width=device-width,initial-scale=1"/> | |
49 <meta name="theme-color" content="#000000"/> | |
50 <meta name="description" content="A fast and flexible genome browser"/> | |
51 <link rel="manifest" href="./manifest.json"/> | |
52 <title>JBrowse</title> | |
53 </script> | |
54 </head> | |
55 <body style="overscroll-behavior:none; height:100%; margin: 0;"> | |
56 <iframe | |
57 id="jbframe" | |
58 title="JBrowse2" | |
59 frameborder="0" | |
60 width="100%" | |
61 height="100%" | |
62 src='index_noview.html?config=config.json__SESSION_SPEC__'> | |
63 </iframe> | |
64 </body> | |
65 </html> | |
66 """ | |
40 | 67 |
41 | 68 |
42 class ColorScaling(object): | 69 class ColorScaling(object): |
43 | 70 |
44 COLOR_FUNCTION_TEMPLATE = """ | 71 COLOR_FUNCTION_TEMPLATE = """ |
374 | 401 |
375 class JbrowseConnector(object): | 402 class JbrowseConnector(object): |
376 def __init__(self, outdir, jbrowse2path): | 403 def __init__(self, outdir, jbrowse2path): |
377 self.assemblies = [] # these require more than a few line diff. | 404 self.assemblies = [] # these require more than a few line diff. |
378 self.assmeta = {} | 405 self.assmeta = {} |
406 self.ass_first_contigs = ( | |
407 [] | |
408 ) # for default session - these are read as first line of the assembly .fai | |
379 self.giURL = GALAXY_INFRASTRUCTURE_URL | 409 self.giURL = GALAXY_INFRASTRUCTURE_URL |
380 self.outdir = outdir | 410 self.outdir = outdir |
381 self.jbrowse2path = jbrowse2path | 411 self.jbrowse2path = jbrowse2path |
382 os.makedirs(self.outdir, exist_ok=True) | 412 os.makedirs(self.outdir, exist_ok=True) |
383 self.genome_names = [] | 413 self.genome_names = [] |
448 "displays": [ | 478 "displays": [ |
449 style_data, | 479 style_data, |
450 ] | 480 ] |
451 } | 481 } |
452 return wstyle | 482 return wstyle |
483 | |
484 def urllib_get_2018(): | |
485 # Using a protected member like this is not any more fragile | |
486 # than extending the class and using it. I would use it. | |
487 url = "https://localhost:6667/my-endpoint" | |
488 ssl._create_default_https_context = ssl._create_unverified_context | |
489 with urllib.request.urlopen(url=url) as f: | |
490 print(f.read().decode("utf-8")) | |
491 | |
492 def urllib_get_2022(): | |
493 # Finally! Able to use the publice API. Happy happy! | |
494 url = "https://localhost:6667/my-endpoint" | |
495 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS) | |
496 scontext.verify_mode = ssl.VerifyMode.CERT_NONE | |
497 with urllib.request.urlopen(url=url, context=scontext) as f: | |
498 print(f.read().decode("utf-8")) | |
453 | 499 |
454 def process_genomes(self, genomes): | 500 def process_genomes(self, genomes): |
455 assembly = [] | 501 assembly = [] |
456 assmeta = [] | 502 assmeta = [] |
457 useuri = False | 503 useuri = False |
467 if genome_name not in genome_names: | 513 if genome_name not in genome_names: |
468 # pafs with shared references | 514 # pafs with shared references |
469 fapath = genome_node["path"] | 515 fapath = genome_node["path"] |
470 if not useuri: | 516 if not useuri: |
471 fapath = os.path.realpath(fapath) | 517 fapath = os.path.realpath(fapath) |
472 assem = self.make_assembly(fapath, genome_name, useuri) | 518 assem, first_contig = self.make_assembly(fapath, genome_name, useuri) |
473 assembly.append(assem) | 519 assembly.append(assem) |
520 self.ass_first_contigs.append(first_contig) | |
474 if len(genome_names) == 0: | 521 if len(genome_names) == 0: |
475 this_genome["genome_name"] = genome_name # first one for all tracks | 522 this_genome["genome_name"] = genome_name # first one for all tracks |
476 genome_names.append(genome_name) | 523 genome_names.append(genome_name) |
477 this_genome["genome_sequence_adapter"] = assem["sequence"][ | 524 this_genome["genome_sequence_adapter"] = assem["sequence"][ |
478 "adapter" | 525 "adapter" |
489 ].strip() | 536 ].strip() |
490 else: | 537 else: |
491 this_genome["genome_firstcontig"] = fl | 538 this_genome["genome_firstcontig"] = fl |
492 else: | 539 else: |
493 try: | 540 try: |
494 fl = urllib.request.urlopen(fapath + ".fai").readline() | 541 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS) |
542 scontext.verify_mode = ssl.VerifyMode.CERT_NONE | |
543 with urllib.request.urlopen( | |
544 url=fapath + ".fai", context=scontext | |
545 ) as f: | |
546 fl = f.readline() | |
495 except Exception: | 547 except Exception: |
496 fl = None | 548 fl = None |
497 if fl: # is first row of the text fai so the first contig name | 549 if fl: # is first row of the text fai so the first contig name |
498 this_genome["genome_firstcontig"] = ( | 550 this_genome["genome_firstcontig"] = ( |
499 fl.decode("utf8").strip().split()[0] | 551 fl.decode("utf8").strip().split()[0] |
504 self.tracksToAdd[genome_names[0]] = [] | 556 self.tracksToAdd[genome_names[0]] = [] |
505 self.genome_names += genome_names | 557 self.genome_names += genome_names |
506 return this_genome["genome_name"] | 558 return this_genome["genome_name"] |
507 | 559 |
508 def make_assembly(self, fapath, gname, useuri): | 560 def make_assembly(self, fapath, gname, useuri): |
561 """added code to grab the first contig name and length for broken default session from Anthony and Helena's code | |
562 that poor Bjoern is trying to figure out. | |
563 """ | |
509 if useuri: | 564 if useuri: |
510 faname = fapath | 565 faname = fapath |
511 adapter = { | 566 adapter = { |
512 "type": "BgzipFastaAdapter", | 567 "type": "BgzipFastaAdapter", |
513 "fastaLocation": {"uri": faname, "locationType": "UriLocation"}, | 568 "fastaLocation": {"uri": faname, "locationType": "UriLocation"}, |
514 "faiLocation": {"uri": faname + ".fai", "locationType": "UriLocation"}, | 569 "faiLocation": {"uri": faname + ".fai", "locationType": "UriLocation"}, |
515 "gziLocation": {"uri": faname + ".gzi", "locationType": "UriLocation"}, | 570 "gziLocation": {"uri": faname + ".gzi", "locationType": "UriLocation"}, |
516 } | 571 } |
572 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS) | |
573 scontext.verify_mode = ssl.VerifyMode.CERT_NONE | |
574 with urllib.request.urlopen(url=faname + ".fai", context=scontext) as f: | |
575 fl = f.readline() | |
576 contig = fl.decode("utf8").strip() | |
577 # Merlin 172788 8 60 61 | |
517 else: | 578 else: |
518 faname = gname + ".fa.gz" | 579 faname = gname + ".fa.gz" |
519 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 580 fadest = os.path.realpath(os.path.join(self.outdir, faname)) |
520 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( | 581 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( |
521 fapath, | 582 fapath, |
535 }, | 596 }, |
536 "gziLocation": { | 597 "gziLocation": { |
537 "uri": faname + ".gzi", | 598 "uri": faname + ".gzi", |
538 }, | 599 }, |
539 } | 600 } |
540 | 601 contig = open(fadest + ".fai", "r").readline().strip() |
602 first_contig = contig.split()[:2] | |
603 first_contig.insert(0, gname) | |
541 trackDict = { | 604 trackDict = { |
542 "name": gname, | 605 "name": gname, |
543 "sequence": { | 606 "sequence": { |
544 "type": "ReferenceSequenceTrack", | 607 "type": "ReferenceSequenceTrack", |
545 "trackId": gname, | 608 "trackId": gname, |
554 "type": "LinearGCContentDisplay", | 617 "type": "LinearGCContentDisplay", |
555 "displayId": "%s-LinearGCContentDisplay" % gname, | 618 "displayId": "%s-LinearGCContentDisplay" % gname, |
556 }, | 619 }, |
557 ], | 620 ], |
558 } | 621 } |
559 return trackDict | 622 return (trackDict, first_contig) |
560 | 623 |
561 def add_default_view(self): | 624 def add_default_view(self): |
562 cmd = [ | 625 cmd = [ |
563 "jbrowse", | 626 "jbrowse", |
564 "set-default-session", | 627 "set-default-session", |
905 ["cp", os.path.realpath(cram_index), dest + ".crai"] | 968 ["cp", os.path.realpath(cram_index), dest + ".crai"] |
906 ) | 969 ) |
907 else: | 970 else: |
908 cpath = os.path.realpath(dest) + ".crai" | 971 cpath = os.path.realpath(dest) + ".crai" |
909 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)] | 972 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)] |
910 logging.debug("executing cmd %s" % " ".join(cmd)) | |
911 self.subprocess_check_call(cmd) | 973 self.subprocess_check_call(cmd) |
912 trackDict = { | 974 trackDict = { |
913 "type": "AlignmentsTrack", | 975 "type": "AlignmentsTrack", |
914 "trackId": tId, | 976 "trackId": tId, |
915 "name": trackData["name"], | 977 "name": trackData["name"], |
1134 "https://" | 1196 "https://" |
1135 ) | 1197 ) |
1136 | 1198 |
1137 if gname not in self.genome_names: | 1199 if gname not in self.genome_names: |
1138 # ignore if already there - eg for duplicates among pafs. | 1200 # ignore if already there - eg for duplicates among pafs. |
1139 asstrack = self.make_assembly(pgpaths[i], gname, useuri) | 1201 asstrack, first_contig = self.make_assembly(pgpaths[i], gname, useuri) |
1140 self.genome_names.append(gname) | 1202 self.genome_names.append(gname) |
1141 self.tracksToAdd[gname] = [] | 1203 self.tracksToAdd[gname] = [] |
1142 self.assemblies.append(asstrack) | 1204 self.assemblies.append(asstrack) |
1143 trackDict = { | 1205 trackDict = { |
1144 "type": "SyntenyTrack", | 1206 "type": "SyntenyTrack", |
1316 for gnome in self.genome_names: | 1378 for gnome in self.genome_names: |
1317 tracks_data = [] | 1379 tracks_data = [] |
1318 for track_conf in self.tracksToAdd[gnome]: | 1380 for track_conf in self.tracksToAdd[gnome]: |
1319 tId = track_conf["trackId"] | 1381 tId = track_conf["trackId"] |
1320 track_types[tId] = track_conf["type"] | 1382 track_types[tId] = track_conf["type"] |
1321 style_data = default_data["style"].get(tId, None) | 1383 style_data = default_data[gnome]["style"].get(tId, None) |
1322 if not style_data: | 1384 if not style_data: |
1323 logging.debug( | 1385 logging.debug( |
1324 "### No style data in default data %s for %s" | 1386 "### No style data in default data %s for %s" |
1325 % (default_data, tId) | 1387 % (default_data, tId) |
1326 ) | 1388 ) |
1391 self.config_json.update(config_json) | 1453 self.config_json.update(config_json) |
1392 logging.debug("defaultSession=%s" % (pp)) | 1454 logging.debug("defaultSession=%s" % (pp)) |
1393 with open(self.config_json_file, "w") as config_file: | 1455 with open(self.config_json_file, "w") as config_file: |
1394 json.dump(self.config_json, config_file, indent=2) | 1456 json.dump(self.config_json, config_file, indent=2) |
1395 | 1457 |
1458 def add_defsess_to_index(self, data): | |
1459 """ | |
1460 Broken in Anthony's PR because only ever dealt with the first assembly. | |
1461 | |
1462 Add some default session settings: set some assemblies/tracks on/off | |
1463 | |
1464 This allows to select a default view: | |
1465 - jb type (Linear, Circular, etc) | |
1466 - default location on an assembly | |
1467 - default tracks | |
1468 - ... | |
1469 | |
1470 Different methods to do that were tested/discussed: | |
1471 - using a defaultSession item in config.json: this proved to be difficult: | |
1472 forced to write a full session block, including hard-coded/hard-to-guess items, | |
1473 no good way to let Jbrowse2 display a scaffold without knowing its size | |
1474 - using JBrowse2 as an embedded React component in a tool-generated html file: | |
1475 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ... | |
1476 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below) | |
1477 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session | |
1478 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe | |
1479 we selected this option | |
1480 | |
1481 Xrefs to understand the choices: | |
1482 https://github.com/GMOD/jbrowse-components/issues/2708 | |
1483 https://github.com/GMOD/jbrowse-components/discussions/3568 | |
1484 https://github.com/GMOD/jbrowse-components/pull/4148 | |
1485 """ | |
1486 new_index = "Nothing written" | |
1487 session_spec = {"views": []} | |
1488 logging.debug("def data=%s" % data) | |
1489 for first_contig in self.ass_first_contigs: | |
1490 gnome, refName, end = first_contig | |
1491 start = 0 | |
1492 if False or data.get("defaultLocation", ""): | |
1493 loc_match = re.search( | |
1494 r"^([^:]+):([\d,]*)\.*([\d,]*)$", data["defaultLocation"] | |
1495 ) | |
1496 # loc_match = re.search(r"^(\w+):(\d+)\.+(\d+)$", data["defaultLocation"]) | |
1497 if loc_match: | |
1498 refName = loc_match.group(1) | |
1499 start = int(loc_match.group(2)) | |
1500 end = int(loc_match.group(3)) | |
1501 else: | |
1502 if refName: | |
1503 view = { | |
1504 "assembly": gnome, | |
1505 "loc": "{}:{}-{}".format(refName, start, end), | |
1506 "type": "LinearGenomeView", | |
1507 "tracks": data[gnome]["tracks"], | |
1508 } | |
1509 session_spec["views"].append(view) | |
1510 sess = json.dumps(session_spec, sort_keys=True, indent=2) | |
1511 new_index = INDEX_TEMPLATE.replace( | |
1512 "__SESSION_SPEC__", "&session=spec-{}".format(sess) | |
1513 ) | |
1514 | |
1515 os.rename( | |
1516 os.path.join(self.outdir, "index.html"), | |
1517 os.path.join(self.outdir, "index_noview.html"), | |
1518 ) | |
1519 | |
1520 with open(os.path.join(self.outdir, "index.html"), "w") as nind: | |
1521 nind.write(new_index) | |
1522 logging.debug( | |
1523 "#### add_defsession gnome=%s refname=%s\nsession_spec=%s\nnew_index=%s" | |
1524 % (gnome, refName, sess, new_index) | |
1525 ) | |
1526 | |
1396 def add_general_configuration(self, data): | 1527 def add_general_configuration(self, data): |
1397 """ | 1528 """ |
1398 Add some general configuration to the config.json file | 1529 Add some general configuration to the config.json file |
1399 """ | 1530 """ |
1400 | 1531 |
1424 config_json["configuration"].update(config_data) | 1555 config_json["configuration"].update(config_data) |
1425 self.config_json.update(config_json) | 1556 self.config_json.update(config_json) |
1426 with open(config_path, "w") as config_file: | 1557 with open(config_path, "w") as config_file: |
1427 json.dump(self.config_json, config_file, indent=2) | 1558 json.dump(self.config_json, config_file, indent=2) |
1428 | 1559 |
1429 def clone_jbrowse(self, realclone=True): | 1560 def clone_jbrowse(self, realclone=False): |
1430 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now | 1561 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now |
1431 Leave as True between version updates on temporary tools - requires manual conda trigger :( | 1562 Leave as True between version updates on temporary tools - requires manual conda trigger :( |
1432 """ | 1563 """ |
1433 dest = self.outdir | 1564 dest = self.outdir |
1434 if realclone: | 1565 if realclone: |
1478 # be GET and not POST so it should redirect OK | 1609 # be GET and not POST so it should redirect OK |
1479 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL | 1610 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL |
1480 | 1611 |
1481 jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path) | 1612 jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path) |
1482 | 1613 |
1483 default_session_data = { | 1614 default_session_data = {} |
1484 "visibility": { | |
1485 "default_on": [], | |
1486 "default_off": [], | |
1487 }, | |
1488 "style": {}, | |
1489 "style_labels": {}, | |
1490 } | |
1491 | 1615 |
1492 for ass in root.findall("assembly"): | 1616 for ass in root.findall("assembly"): |
1493 genomes = [ | 1617 genomes = [ |
1494 { | 1618 { |
1495 "path": x.attrib["path"], | 1619 "path": x.attrib["path"], |
1498 "meta": metadata_from_node(x.find("metadata")), | 1622 "meta": metadata_from_node(x.find("metadata")), |
1499 } | 1623 } |
1500 for x in ass.findall("metadata/genomes/genome") | 1624 for x in ass.findall("metadata/genomes/genome") |
1501 ] | 1625 ] |
1502 assref_name = jc.process_genomes(genomes) | 1626 assref_name = jc.process_genomes(genomes) |
1503 | 1627 if not default_session_data.get(assref_name, None): |
1628 default_session_data[assref_name] = { | |
1629 "tracks": [], | |
1630 "style": {}, | |
1631 "style_labels": {}, | |
1632 "visibility": { | |
1633 "default_on": [], | |
1634 "default_off": [], | |
1635 }, | |
1636 } | |
1504 for track in ass.find("tracks"): | 1637 for track in ass.find("tracks"): |
1505 track_conf = {} | 1638 track_conf = {} |
1506 track_conf["trackfiles"] = [] | 1639 track_conf["trackfiles"] = [] |
1507 track_conf["assemblyNames"] = assref_name | 1640 track_conf["assemblyNames"] = assref_name |
1508 is_multi_bigwig = False | 1641 is_multi_bigwig = False |
1575 if keys: | 1708 if keys: |
1576 for key in keys: | 1709 for key in keys: |
1577 vis = track.attrib.get("visibility", "default_off") | 1710 vis = track.attrib.get("visibility", "default_off") |
1578 if not vis: | 1711 if not vis: |
1579 vis = "default_off" | 1712 vis = "default_off" |
1580 default_session_data["visibility"][vis].append(key) | 1713 default_session_data[assref_name]["visibility"][vis].append(key) |
1581 if track.find("options/style"): | 1714 if track.find("options/style"): |
1582 default_session_data["style"][key] = { | 1715 default_session_data[assref_name]["style"][key] = { |
1583 item.tag: parse_style_conf(item) | 1716 item.tag: parse_style_conf(item) |
1584 for item in track.find("options/style") | 1717 for item in track.find("options/style") |
1585 } | 1718 } |
1586 else: | 1719 else: |
1587 default_session_data["style"][key] = {} | 1720 default_session_data[assref_name]["style"][key] = {} |
1588 logging.debug("@@@@ no options/style found for %s" % (key)) | 1721 logging.debug("no options/style found for %s" % (key)) |
1589 | 1722 |
1590 if track.find("options/style_labels"): | 1723 if track.find("options/style_labels"): |
1591 default_session_data["style_labels"][key] = { | 1724 default_session_data[assref_name]["style_labels"][key] = { |
1592 item.tag: parse_style_conf(item) | 1725 item.tag: parse_style_conf(item) |
1593 for item in track.find("options/style_labels") | 1726 for item in track.find("options/style_labels") |
1594 } | 1727 } |
1728 default_session_data[assref_name]["tracks"].append(key) | |
1595 default_session_data["defaultLocation"] = root.find( | 1729 default_session_data["defaultLocation"] = root.find( |
1596 "metadata/general/defaultLocation" | 1730 "metadata/general/defaultLocation" |
1597 ).text | 1731 ).text |
1598 default_session_data["session_name"] = root.find( | 1732 default_session_data["session_name"] = root.find( |
1599 "metadata/general/session_name" | 1733 "metadata/general/session_name" |
1614 trackconf += jc.tracksToAdd[gnome] | 1748 trackconf += jc.tracksToAdd[gnome] |
1615 jc.config_json["tracks"] = trackconf | 1749 jc.config_json["tracks"] = trackconf |
1616 assconf = jc.config_json.get("assemblies", []) | 1750 assconf = jc.config_json.get("assemblies", []) |
1617 assconf += jc.assemblies | 1751 assconf += jc.assemblies |
1618 jc.config_json["assemblies"] = assconf | 1752 jc.config_json["assemblies"] = assconf |
1619 logging.debug("&&&assemblies=%s, gnames=%s" % (assconf, jc.genome_names)) | 1753 logging.debug("assemblies=%s, gnames=%s" % (assconf, jc.genome_names)) |
1620 jc.write_config() | 1754 jc.write_config() |
1621 jc.add_default_session(default_session_data) | 1755 jc.add_default_session(default_session_data) |
1756 jc.add_defsess_to_index(default_session_data) | |
1622 # jc.text_index() not sure what broke here. | 1757 # jc.text_index() not sure what broke here. |