Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 60:81d535970196 draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 403a35e1245fa5e62f4be6116a725b9e4d9c353a
author | fubar |
---|---|
date | Mon, 25 Mar 2024 02:10:05 +0000 |
parents | f807e219cec3 |
children | e7a6f7a7148d |
comparison
equal
deleted
inserted
replaced
59:f807e219cec3 | 60:81d535970196 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # change to accumulating all configuration for config.json based on the default from the clone | 2 |
3 import argparse | 3 import argparse |
4 import binascii | 4 import binascii |
5 import datetime | 5 import datetime |
6 import json | 6 import json |
7 import logging | 7 import logging |
21 JB2VER = "v2.10.3" | 21 JB2VER = "v2.10.3" |
22 # version pinned for cloning | 22 # version pinned for cloning |
23 | 23 |
24 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") | 24 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") |
25 GALAXY_INFRASTRUCTURE_URL = None | 25 GALAXY_INFRASTRUCTURE_URL = None |
26 | |
27 # version pinned for cloning | |
28 | 26 |
29 mapped_chars = { | 27 mapped_chars = { |
30 ">": "__gt__", | 28 ">": "__gt__", |
31 "<": "__lt__", | 29 "<": "__lt__", |
32 "'": "__sq__", | 30 "'": "__sq__", |
456 genome_name = genome_node["meta"]["dataset_dname"].strip() | 454 genome_name = genome_node["meta"]["dataset_dname"].strip() |
457 if len(genome_name.split()) > 1: | 455 if len(genome_name.split()) > 1: |
458 genome_name = genome_name.split()[0] | 456 genome_name = genome_name.split()[0] |
459 # spaces and cruft break scripts when substituted | 457 # spaces and cruft break scripts when substituted |
460 if genome_name not in self.genome_names: | 458 if genome_name not in self.genome_names: |
461 # ignore dupes - can have multiple pafs with same references? | 459 # pafs with shared references |
462 fapath = genome_node["path"] | 460 fapath = genome_node["path"] |
463 if not useuri: | 461 if not useuri: |
464 fapath = os.path.realpath(fapath) | 462 fapath = os.path.realpath(fapath) |
465 assem = self.make_assembly(fapath, genome_name, useuri) | 463 assem = self.make_assembly(fapath, genome_name, useuri) |
466 assemblies.append(assem) | 464 assemblies.append(assem) |
467 self.genome_names.append(genome_name) | 465 self.genome_names.append(genome_name) |
468 if self.genome_name is None: | 466 if self.genome_name is None: |
469 self.genome_name = ( | 467 self.genome_name = ( |
470 genome_name # first one for all tracks - other than paf | 468 genome_name # first one for all tracks |
471 ) | 469 ) |
472 self.genome_sequence_adapter = assem["sequence"]["adapter"] | 470 self.genome_sequence_adapter = assem["sequence"]["adapter"] |
473 self.genome_firstcontig = None | 471 self.genome_firstcontig = None |
474 if not useuri: | 472 if not useuri: |
475 fl = open(fapath, "r").readline() | 473 fl = open(fapath, "r").readline() |
479 if len(fl.split()) > 1: | 477 if len(fl.split()) > 1: |
480 self.genome_firstcontig = fl.split()[0].strip() | 478 self.genome_firstcontig = fl.split()[0].strip() |
481 else: | 479 else: |
482 self.genome_firstcontig = fl | 480 self.genome_firstcontig = fl |
483 else: | 481 else: |
484 fl = urllib.request.urlopen(fapath + ".fai").readline() | 482 try: |
483 fl = urllib.request.urlopen(fapath + ".fai").readline() | |
484 except: | |
485 fl = None | |
485 if fl: # is first row of the text fai so the first contig name | 486 if fl: # is first row of the text fai so the first contig name |
486 self.genome_firstcontig = ( | 487 self.genome_firstcontig = ( |
487 fl.decode("utf8").strip().split()[0] | 488 fl.decode("utf8").strip().split()[0] |
488 ) | 489 ) |
490 else: | |
491 self.genome_firstcontig = None | |
489 if self.config_json.get("assemblies", None): | 492 if self.config_json.get("assemblies", None): |
490 self.config_json["assemblies"] += assemblies | 493 self.config_json["assemblies"] += assemblies |
491 else: | 494 else: |
492 self.config_json["assemblies"] = assemblies | 495 self.config_json["assemblies"] = assemblies |
493 | 496 |
604 faiLocation: | 607 faiLocation: |
605 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai', | 608 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai', |
606 gziLocation: | 609 gziLocation: |
607 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi', | 610 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi', |
608 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438 | 611 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438 |
612 | |
613 | |
609 """ | 614 """ |
610 tId = trackData["label"] | 615 tId = trackData["label"] |
611 # can be served - if public. | 616 # can be served - if public. |
612 # dsId = trackData["metadata"]["dataset_id"] | 617 # dsId = trackData["metadata"]["dataset_id"] |
613 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) | 618 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) |
614 useuri = trackData["useuri"].lower() == "yes" | 619 useuri = trackData["useuri"].lower() == "yes" |
615 if useuri: | 620 if useuri: |
616 uri = data | 621 uri = data |
617 else: | 622 else: |
618 uri = trackData["hic_url"] | 623 uri = "%s.hic" % trackData["label"] |
624 # slashes in names cause path trouble | |
625 dest = os.path.join(self.outdir, uri) | |
626 cmd = ["cp", data, dest] | |
627 self.subprocess_check_call(cmd) | |
619 categ = trackData["category"] | 628 categ = trackData["category"] |
620 trackDict = { | 629 trackDict = { |
621 "type": "HicTrack", | 630 "type": "HicTrack", |
622 "trackId": tId, | 631 "trackId": tId, |
623 "name": uri, | 632 "name": trackData["name"], |
624 "assemblyNames": [self.genome_name], | 633 "assemblyNames": [self.genome_name], |
625 "category": [ | 634 "category": [ |
626 categ, | 635 categ, |
627 ], | 636 ], |
628 "adapter": { | 637 "adapter": { |
629 "type": "HicAdapter", | 638 "type": "HicAdapter", |
630 "hicLocation": uri, | 639 "hicLocation": { "uri": uri } |
631 }, | 640 } |
632 "displays": [ | 641 } |
633 { | |
634 "type": "LinearHicDisplay", | |
635 "displayId": "%s-LinearHicDisplay" % tId, | |
636 }, | |
637 ], | |
638 } | |
639 style_json = self._prepare_track_style(trackDict) | |
640 trackDict["style"] = style_json | |
641 self.tracksToAdd.append(trackDict) | 642 self.tracksToAdd.append(trackDict) |
642 self.trackIdlist.append(tId) | 643 self.trackIdlist.append(tId) |
643 | 644 |
644 def add_maf(self, data, trackData): | 645 def add_maf(self, data, trackData): |
645 """ | 646 """ |
791 self.tracksToAdd.append(trackDict) | 792 self.tracksToAdd.append(trackDict) |
792 self.trackIdlist.append(tId) | 793 self.trackIdlist.append(tId) |
793 os.unlink(gff3) | 794 os.unlink(gff3) |
794 | 795 |
795 def add_bigwig(self, data, trackData): | 796 def add_bigwig(self, data, trackData): |
796 """ "type": "LinearWiggleDisplay", | |
797 "configuration": {}, | |
798 "selectedRendering": "", | |
799 "resolution": 1, | |
800 "posColor": "rgb(228, 26, 28)", | |
801 "negColor": "rgb(255, 255, 51)", | |
802 "constraints": {} | |
803 """ | |
804 useuri = trackData["useuri"].lower() == "yes" | 797 useuri = trackData["useuri"].lower() == "yes" |
805 if useuri: | 798 if useuri: |
806 url = data | 799 url = data |
807 else: | 800 else: |
808 url = "%s.bigwig" % trackData["label"] | 801 url = "%s.bigwig" % trackData["label"] |
1276 # Return non-human label for use in other fields | 1269 # Return non-human label for use in other fields |
1277 yield outputTrackConfig["label"] | 1270 yield outputTrackConfig["label"] |
1278 | 1271 |
1279 def add_default_session(self, default_data): | 1272 def add_default_session(self, default_data): |
1280 """ | 1273 """ |
1281 Add some default session settings: set some assemblies/tracks on/off | 1274 default session settings are hard and fragile. |
1275 .add_default_view() and other configuration code adapted from | |
1276 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py | |
1282 """ | 1277 """ |
1283 tracks_data = [] | 1278 tracks_data = [] |
1284 | |
1285 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 | 1279 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 |
1286 | |
1287 # We need to know the track type from the config.json generated just before | |
1288 track_types = {} | 1280 track_types = {} |
1289 with open(self.config_json_file, "r") as config_file: | 1281 with open(self.config_json_file, "r") as config_file: |
1290 config_json = json.load(config_file) | 1282 config_json = json.load(config_file) |
1291 if self.config_json: | 1283 if self.config_json: |
1292 config_json.update(self.config_json) | 1284 config_json.update(self.config_json) |
1293 | |
1294 for track_conf in self.tracksToAdd: | 1285 for track_conf in self.tracksToAdd: |
1295 track_types[track_conf["trackId"]] = track_conf["type"] | |
1296 tId = track_conf["trackId"] | 1286 tId = track_conf["trackId"] |
1297 #if tId in data["visibility"]["default_on"]: | 1287 track_types[tId] = track_conf["type"] |
1298 style_data = default_data["style"].get(tId, None) | 1288 style_data = default_data["style"].get(tId, None) |
1299 if not style_data: | 1289 if not style_data: |
1300 logging.warn("### No style data in default data for %s" % tId) | 1290 logging.warn("### No style data in default data %s for %s" % (default_data, tId)) |
1301 style_data = {"type": "LinearBasicDisplay"} | 1291 style_data = {"type": "LinearBasicDisplay"} |
1302 if "displays" in track_conf: | 1292 if "displays" in track_conf: |
1303 disp = track_conf["displays"][0]["type"] | 1293 disp = track_conf["displays"][0]["type"] |
1304 style_data["type"] = disp | 1294 style_data["type"] = disp |
1305 if track_conf.get("style_labels", None): | 1295 if track_conf.get("style_labels", None): |
1313 "displays": [style_data], | 1303 "displays": [style_data], |
1314 } | 1304 } |
1315 ) | 1305 ) |
1316 # The view for the assembly we're adding | 1306 # The view for the assembly we're adding |
1317 view_json = {"type": "LinearGenomeView", "tracks": tracks_data} | 1307 view_json = {"type": "LinearGenomeView", "tracks": tracks_data} |
1318 | 1308 logging.warn("### view_json=%s" % view_json) |
1319 refName = None | 1309 refName = None |
1320 drdict = { | 1310 drdict = { |
1321 "reversed": False, | 1311 "reversed": False, |
1322 "assemblyName": self.genome_name, | 1312 "assemblyName": self.genome_name, |
1323 "start": 1, | 1313 "start": 1, |
1406 config_json["configuration"].update(config_data) | 1396 config_json["configuration"].update(config_data) |
1407 self.config_json.update(config_json) | 1397 self.config_json.update(config_json) |
1408 with open(config_path, "w") as config_file: | 1398 with open(config_path, "w") as config_file: |
1409 json.dump(self.config_json, config_file, indent=2) | 1399 json.dump(self.config_json, config_file, indent=2) |
1410 | 1400 |
1411 def clone_jbrowse(self, realclone=True): | 1401 def clone_jbrowse(self, realclone=False): |
1412 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now""" | 1402 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now |
1403 Leave as True between version updates on temporary tools - requires manual conda trigger :( | |
1404 """ | |
1413 dest = self.outdir | 1405 dest = self.outdir |
1414 if realclone: | 1406 if realclone: |
1415 self.subprocess_check_call( | 1407 self.subprocess_check_call( |
1416 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"] | 1408 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"] |
1417 ) | 1409 ) |
1470 for x in root.findall("metadata/genomes/genome") | 1462 for x in root.findall("metadata/genomes/genome") |
1471 ], | 1463 ], |
1472 ) | 1464 ) |
1473 jc.process_genomes() | 1465 jc.process_genomes() |
1474 | 1466 |
1475 # .add_default_view() replace from https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py | |
1476 default_session_data = { | 1467 default_session_data = { |
1477 "visibility": { | 1468 "visibility": { |
1478 "default_on": [], | 1469 "default_on": [], |
1479 "default_off": [], | 1470 "default_off": [], |
1480 }, | 1471 }, |
1498 | 1489 |
1499 trackfiles = track.findall("files/trackFile") | 1490 trackfiles = track.findall("files/trackFile") |
1500 if trackfiles: | 1491 if trackfiles: |
1501 for x in track.findall("files/trackFile"): | 1492 for x in track.findall("files/trackFile"): |
1502 track_conf["label"] = x.attrib["label"] | 1493 track_conf["label"] = x.attrib["label"] |
1494 trackkey = track_conf["label"] | |
1503 track_conf["useuri"] = x.attrib["useuri"] | 1495 track_conf["useuri"] = x.attrib["useuri"] |
1504 if is_multi_bigwig: | 1496 if is_multi_bigwig: |
1505 multi_bigwig_paths.append( | 1497 multi_bigwig_paths.append( |
1506 ( | 1498 ( |
1507 x.attrib["label"], | 1499 x.attrib["label"], |
1542 {}, # No metadata for multiple bigwig | 1534 {}, # No metadata for multiple bigwig |
1543 ) | 1535 ) |
1544 ) | 1536 ) |
1545 track_conf["category"] = track.attrib["cat"] | 1537 track_conf["category"] = track.attrib["cat"] |
1546 track_conf["format"] = track.attrib["format"] | 1538 track_conf["format"] = track.attrib["format"] |
1547 if track.find("options/style"): | |
1548 track_conf["style"] = { | |
1549 item.tag: parse_style_conf(item) for item in track.find("options/style") | |
1550 } | |
1551 else: | |
1552 track_conf["style"] = {} | |
1553 if track.find("options/style_labels"): | |
1554 track_conf["style_labels"] = { | |
1555 item.tag: parse_style_conf(item) | |
1556 for item in track.find("options/style_labels") | |
1557 } | |
1558 | |
1559 track_conf["conf"] = etree_to_dict(track.find("options")) | 1539 track_conf["conf"] = etree_to_dict(track.find("options")) |
1560 track_conf["category"] = track.attrib["cat"] | 1540 track_conf["category"] = track.attrib["cat"] |
1561 track_conf["format"] = track.attrib["format"] | 1541 track_conf["format"] = track.attrib["format"] |
1562 keys = jc.process_annotations(track_conf) | 1542 keys = jc.process_annotations(track_conf) |
1563 | 1543 |
1564 if keys: | 1544 if keys: |
1565 for key in keys: | 1545 for key in keys: |
1566 default_session_data["visibility"][ | 1546 default_session_data["visibility"][ |
1567 track.attrib.get("visibility", "default_off") | 1547 track.attrib.get("visibility", "default_off") |
1568 ].append(key) | 1548 ].append(key) |
1569 if track_conf.get("style", None): | 1549 if track.find("options/style"): |
1570 default_session_data["style"][key] = track_conf["style"] | 1550 default_session_data["style"][key] = { |
1571 if track_conf.get("style_lables", None): | 1551 item.tag: parse_style_conf(item) for item in track.find("options/style") |
1572 default_session_data["style_labels"][key] = track_conf.get( | 1552 } |
1573 "style_labels", None | 1553 logging.warn("### added %s to defsess %s for %s" % (trackkey, default_session_data, key )) |
1574 ) | 1554 else: |
1555 default_session_data["style"][key] = {} | |
1556 logging.warn("@@@@ no options/style found for %s" % (key)) | |
1557 | |
1558 if track.find("options/style_labels"): | |
1559 default_session_data["style_labels"][key] = { | |
1560 item.tag: parse_style_conf(item) | |
1561 for item in track.find("options/style_labels") | |
1562 } | |
1575 default_session_data["defaultLocation"] = root.find( | 1563 default_session_data["defaultLocation"] = root.find( |
1576 "metadata/general/defaultLocation" | 1564 "metadata/general/defaultLocation" |
1577 ).text | 1565 ).text |
1578 default_session_data["session_name"] = root.find( | 1566 default_session_data["session_name"] = root.find( |
1579 "metadata/general/session_name" | 1567 "metadata/general/session_name" |