Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 46:4181e97c70a7 draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 3a43e9e0ffce0966101203102e769d1ced28618a
| author | fubar |
|---|---|
| date | Mon, 04 Mar 2024 09:47:19 +0000 |
| parents | bea0916e1080 |
| children | 3e53204c2419 |
comparison
equal
deleted
inserted
replaced
| 45:0ec526d2d8c1 | 46:4181e97c70a7 |
|---|---|
| 9 import re | 9 import re |
| 10 import shutil | 10 import shutil |
| 11 import struct | 11 import struct |
| 12 import subprocess | 12 import subprocess |
| 13 import tempfile | 13 import tempfile |
| 14 import urllib.request | |
| 14 import xml.etree.ElementTree as ET | 15 import xml.etree.ElementTree as ET |
| 15 from collections import defaultdict | 16 from collections import defaultdict |
| 16 | 17 |
| 17 logging.basicConfig(level=logging.INFO) | 18 logging.basicConfig(level=logging.INFO) |
| 18 log = logging.getLogger("jbrowse") | 19 log = logging.getLogger("jbrowse") |
| 444 ] | 445 ] |
| 445 } | 446 } |
| 446 | 447 |
| 447 def process_genomes(self): | 448 def process_genomes(self): |
| 448 assemblies = [] | 449 assemblies = [] |
| 450 useuri = False | |
| 449 for i, genome_node in enumerate(self.genome_paths): | 451 for i, genome_node in enumerate(self.genome_paths): |
| 452 if genome_node["useuri"].strip().lower() == "yes": | |
| 453 useuri = True | |
| 450 genome_name = genome_node["meta"]["dataset_dname"].strip() | 454 genome_name = genome_node["meta"]["dataset_dname"].strip() |
| 451 if len(genome_name.split()) > 1: | 455 if len(genome_name.split()) > 1: |
| 452 genome_name = genome_name.split()[0] | 456 genome_name = genome_name.split()[0] |
| 453 # spaces and cruft break scripts when substituted | 457 # spaces and cruft break scripts when substituted |
| 454 if genome_name not in self.genome_names: | 458 if genome_name not in self.genome_names: |
| 455 # ignore dupes - can have multiple pafs with same references? | 459 # ignore dupes - can have multiple pafs with same references? |
| 456 fapath = genome_node["path"] | 460 fapath = genome_node["path"] |
| 457 assem = self.make_assembly(fapath, genome_name) | 461 if not useuri: |
| 462 fapath = os.path.realpath(fapath) | |
| 463 assem = self.make_assembly(fapath, genome_name, useuri) | |
| 458 assemblies.append(assem) | 464 assemblies.append(assem) |
| 459 self.genome_names.append(genome_name) | 465 self.genome_names.append(genome_name) |
| 460 if self.genome_name is None: | 466 if self.genome_name is None: |
| 461 self.genome_name = ( | 467 self.genome_name = ( |
| 462 genome_name # first one for all tracks - other than paf | 468 genome_name # first one for all tracks - other than paf |
| 463 ) | 469 ) |
| 464 self.genome_firstcontig = None | 470 self.genome_firstcontig = None |
| 465 fl = open(fapath, "r").readline().strip().split(">") | 471 if not useuri: |
| 466 if len(fl) > 1: | 472 # https://lazarus.name/jbrowse/fish/bigwig_0_coverage_bedgraph_cov_count_count_bw.bigwig |
| 467 fl = fl[1] | 473 # https://lazarus.name/jbrowse/fish/klBraLanc5.haps_combined.decontam.20230620.fasta.fa.gz |
| 468 if len(fl.split()) > 1: | 474 fl = open(fapath, "r").readline() |
| 469 self.genome_firstcontig = fl.split()[0].strip() | 475 fls = fl.strip().split(">") |
| 476 if len(fls) > 1: | |
| 477 fl = fls[1] | |
| 478 if len(fl.split()) > 1: | |
| 479 self.genome_firstcontig = fl.split()[0].strip() | |
| 480 else: | |
| 481 self.genome_firstcontig = fl | |
| 470 else: | 482 else: |
| 471 self.genome_firstcontig = fl | 483 fl = urrlib.request.urlopen(faname+".fai").readline() |
| 484 if fl: # is first row of the text fai so the first contig name | |
| 485 self.genome_firstcontig = fl.decode('utf8').strip().split()[0] | |
| 472 if self.config_json.get("assemblies", None): | 486 if self.config_json.get("assemblies", None): |
| 473 self.config_json["assemblies"] += assemblies | 487 self.config_json["assemblies"] += assemblies |
| 474 else: | 488 else: |
| 475 self.config_json["assemblies"] = assemblies | 489 self.config_json["assemblies"] = assemblies |
| 476 | 490 |
| 477 def make_assembly(self, fapath, gname): | 491 def make_assembly(self, fapath, gname, useuri): |
| 478 | 492 if useuri: |
| 479 faname = gname + ".fa.gz" | 493 faname = fapath |
| 480 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 494 adapter = { |
| 481 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( | 495 "type": "BgzipFastaAdapter", |
| 482 fapath, | 496 "fastaLocation": { |
| 483 fadest, | 497 "uri": faname, |
| 484 fadest, | 498 "locationType": "UriLocation", |
| 485 fadest, | 499 }, |
| 486 ) | 500 "faiLocation": { |
| 487 self.subprocess_popen(cmd) | 501 "uri": faname + ".fai", |
| 488 adapter = { | 502 "locationType": "UriLocation", |
| 489 "type": "BgzipFastaAdapter", | 503 }, |
| 490 "fastaLocation": { | 504 "gziLocation": { |
| 491 "uri": faname, | 505 "uri": faname + ".gzi", |
| 492 }, | 506 "locationType": "UriLocation", |
| 493 "faiLocation": { | 507 }, |
| 494 "uri": faname + ".fai", | 508 } |
| 495 }, | 509 else: |
| 496 "gziLocation": { | 510 faname = gname + ".fa.gz" |
| 497 "uri": faname + ".gzi", | 511 fadest = os.path.realpath(os.path.join(self.outdir, faname)) |
| 498 }, | 512 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( |
| 499 } | 513 fapath, |
| 514 fadest, | |
| 515 fadest, | |
| 516 fadest, | |
| 517 ) | |
| 518 self.subprocess_popen(cmd) | |
| 519 | |
| 520 adapter = { | |
| 521 "type": "BgzipFastaAdapter", | |
| 522 "fastaLocation": { | |
| 523 "uri": faname, | |
| 524 }, | |
| 525 "faiLocation": { | |
| 526 "uri": faname + ".fai", | |
| 527 }, | |
| 528 "gziLocation": { | |
| 529 "uri": faname + ".gzi", | |
| 530 }, | |
| 531 } | |
| 500 self.genome_sequence_adapter = adapter | 532 self.genome_sequence_adapter = adapter |
| 501 trackDict = { | 533 trackDict = { |
| 502 "name": gname, | 534 "name": gname, |
| 503 "sequence": { | 535 "sequence": { |
| 504 "type": "ReferenceSequenceTrack", | 536 "type": "ReferenceSequenceTrack", |
| 526 ] | 558 ] |
| 527 self.subprocess_check_call(cmd) | 559 self.subprocess_check_call(cmd) |
| 528 | 560 |
| 529 def write_config(self): | 561 def write_config(self): |
| 530 with open(self.config_json_file, "w") as fp: | 562 with open(self.config_json_file, "w") as fp: |
| 531 json.dump(self.config_json, fp) | 563 json.dump(self.config_json, fp, indent=2) |
| 532 | 564 |
| 533 def text_index(self): | 565 def text_index(self): |
| 534 # Index tracks | 566 # Index tracks |
| 535 args = [ | 567 args = [ |
| 536 "jbrowse", | 568 "jbrowse", |
| 565 """ | 597 """ |
| 566 tId = trackData["label"] | 598 tId = trackData["label"] |
| 567 # can be served - if public. | 599 # can be served - if public. |
| 568 # dsId = trackData["metadata"]["dataset_id"] | 600 # dsId = trackData["metadata"]["dataset_id"] |
| 569 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) | 601 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) |
| 570 hname = trackData["hic_url"] | 602 useuri = trackData["useuri"].lower() == "yes" |
| 571 floc = { | 603 if useuri: |
| 572 "uri": hname, | 604 uri = data |
| 573 } | 605 else: |
| 606 uri = trackData["hic_url"] | |
| 574 trackDict = { | 607 trackDict = { |
| 575 "type": "HicTrack", | 608 "type": "HicTrack", |
| 576 "trackId": tId, | 609 "trackId": tId, |
| 577 "name": hname, | 610 "name": uri, |
| 578 "assemblyNames": [self.genome_name], | 611 "assemblyNames": [self.genome_name], |
| 579 "adapter": { | 612 "adapter": { |
| 580 "type": "HicAdapter", | 613 "type": "HicAdapter", |
| 581 "hicLocation": floc, | 614 "hicLocation": uri, |
| 582 }, | 615 }, |
| 583 "displays": [ | 616 "displays": [ |
| 584 { | 617 { |
| 585 "type": "LinearHicDisplay", | 618 "type": "LinearHicDisplay", |
| 586 "displayId": "%s-LinearHicDisplay" % tId, | 619 "displayId": "%s-LinearHicDisplay" % tId, |
| 597 from https://github.com/cmdcolin/maf2bed | 630 from https://github.com/cmdcolin/maf2bed |
| 598 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name | 631 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name |
| 599 e.g. hg38.chr1 in the sequence identifiers. | 632 e.g. hg38.chr1 in the sequence identifiers. |
| 600 need the reference id - eg hg18, for maf2bed.pl as the first parameter | 633 need the reference id - eg hg18, for maf2bed.pl as the first parameter |
| 601 """ | 634 """ |
| 635 tId = trackData["label"] | |
| 602 mafPlugin = { | 636 mafPlugin = { |
| 603 "plugins": [ | 637 "plugins": [ |
| 604 { | 638 { |
| 605 "name": "MafViewer", | 639 "name": "MafViewer", |
| 606 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", | 640 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", |
| 607 } | 641 } |
| 608 ] | 642 ] |
| 609 } | 643 } |
| 610 tId = trackData["label"] | 644 |
| 611 fname = "%s.bed" % tId | 645 fname = "%s.bed" % tId |
| 612 dest = "%s/%s" % (self.outdir, fname) | 646 dest = "%s/%s" % (self.outdir, fname) |
| 613 gname = self.genome_name | 647 gname = self.genome_name |
| 614 cmd = [ | 648 cmd = [ |
| 615 "bash", | 649 "bash", |
| 742 "resolution": 1, | 776 "resolution": 1, |
| 743 "posColor": "rgb(228, 26, 28)", | 777 "posColor": "rgb(228, 26, 28)", |
| 744 "negColor": "rgb(255, 255, 51)", | 778 "negColor": "rgb(255, 255, 51)", |
| 745 "constraints": {} | 779 "constraints": {} |
| 746 """ | 780 """ |
| 747 url = "%s.bigwig" % trackData["label"] | 781 useuri = trackData["useuri"].lower() == "yes" |
| 748 # slashes in names cause path trouble | 782 if useuri: |
| 749 dest = os.path.join(self.outdir, url) | 783 url = data |
| 750 cmd = ["cp", data, dest] | 784 else: |
| 751 self.subprocess_check_call(cmd) | 785 url = "%s.bigwig" % trackData["label"] |
| 786 # slashes in names cause path trouble | |
| 787 dest = os.path.join(self.outdir, url) | |
| 788 cmd = ["cp", data, dest] | |
| 789 self.subprocess_check_call(cmd) | |
| 752 bwloc = {"uri": url} | 790 bwloc = {"uri": url} |
| 753 tId = trackData["label"] | 791 tId = trackData["label"] |
| 754 trackDict = { | 792 trackDict = { |
| 755 "type": "QuantitativeTrack", | 793 "type": "QuantitativeTrack", |
| 756 "trackId": tId, | 794 "trackId": tId, |
| 772 style_json = self._prepare_track_style(trackDict) | 810 style_json = self._prepare_track_style(trackDict) |
| 773 trackDict["style"] = style_json | 811 trackDict["style"] = style_json |
| 774 self.tracksToAdd.append(trackDict) | 812 self.tracksToAdd.append(trackDict) |
| 775 self.trackIdlist.append(tId) | 813 self.trackIdlist.append(tId) |
| 776 | 814 |
| 777 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | 815 def add_bam(self, data, trackData, bam_index=None, **kwargs): |
| 778 tId = trackData["label"] | 816 tId = trackData["label"] |
| 779 fname = "%s.bam" % trackData["label"] | 817 useuri = trackData["useuri"].lower() == "yes" |
| 780 dest = "%s/%s" % (self.outdir, fname) | 818 bindex = bam_index |
| 781 url = fname | 819 if useuri: |
| 782 self.subprocess_check_call(["cp", data, dest]) | 820 url = data |
| 783 bloc = {"uri": url} | |
| 784 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): | |
| 785 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
| 786 self.subprocess_check_call( | |
| 787 ["cp", os.path.realpath(bam_index), dest + ".bai"] | |
| 788 ) | |
| 789 else: | 821 else: |
| 790 # Can happen in exotic condition | 822 fname = "%s.bam" % trackData["label"] |
| 791 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | 823 dest = "%s/%s" % (self.outdir, fname) |
| 792 # => no index generated by galaxy, but there might be one next to the symlink target | 824 url = fname |
| 793 # this trick allows to skip the bam sorting made by galaxy if already done outside | 825 bindex = fname + '.bai' |
| 794 if os.path.exists(os.path.realpath(data) + ".bai"): | 826 self.subprocess_check_call(["cp", data, dest]) |
| 795 self.symlink_or_copy(os.path.realpath(data) + ".bai", dest + ".bai") | 827 if bam_index is not None and os.path.exists(bam_index): |
| 796 else: | 828 if not os.path.exists(bindex): |
| 797 log.warn("Could not find a bam index (.bai file) for %s", data) | 829 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest |
| 830 self.subprocess_check_call( | |
| 831 ["cp", bam_index, bindex] | |
| 832 ) | |
| 833 else: | |
| 834 # Can happen in exotic condition | |
| 835 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | |
| 836 # => no index generated by galaxy, but there might be one next to the symlink target | |
| 837 # this trick allows to skip the bam sorting made by galaxy if already done outside | |
| 838 if os.path.exists(os.path.realpath(data) + ".bai"): | |
| 839 self.symlink_or_copy(os.path.realpath(data) + ".bai", bindex) | |
| 840 else: | |
| 841 log.warn("Could not find a bam index (.bai file) for %s", data) | |
| 798 trackDict = { | 842 trackDict = { |
| 799 "type": "AlignmentsTrack", | 843 "type": "AlignmentsTrack", |
| 800 "trackId": tId, | 844 "trackId": tId, |
| 801 "name": trackData["name"], | 845 "name": trackData["name"], |
| 802 "assemblyNames": [self.genome_name], | 846 "assemblyNames": [self.genome_name], |
| 803 "adapter": { | 847 "adapter": { |
| 804 "type": "BamAdapter", | 848 "type": "BamAdapter", |
| 805 "bamLocation": bloc, | 849 "bamLocation": {"uri": url}, |
| 806 "index": { | 850 "index": { |
| 807 "location": { | 851 "location": { |
| 808 "uri": fname + ".bai", | 852 "uri": bindex, |
| 809 } | 853 } |
| 810 }, | 854 }, |
| 811 }, | 855 }, |
| 812 "displays": [ | 856 "displays": [ |
| 813 { | 857 { |
| 819 style_json = self._prepare_track_style(trackDict) | 863 style_json = self._prepare_track_style(trackDict) |
| 820 trackDict["style"] = style_json | 864 trackDict["style"] = style_json |
| 821 self.tracksToAdd.append(trackDict) | 865 self.tracksToAdd.append(trackDict) |
| 822 self.trackIdlist.append(tId) | 866 self.trackIdlist.append(tId) |
| 823 | 867 |
| 824 def add_cram(self, data, trackData, cramOpts, cram_index=None, **kwargs): | 868 def add_cram(self, data, trackData, cram_index=None, **kwargs): |
| 825 tId = trackData["label"] | 869 tId = trackData["label"] |
| 826 fname = "%s.cram" % trackData["label"] | 870 useuri = trackData["useuri"].lower() == "yes" |
| 827 dest = "%s/%s" % (self.outdir, fname) | 871 bindex = cram_index |
| 828 url = fname | 872 if useuri: |
| 829 self.subprocess_check_call(["cp", data, dest]) | 873 url = data |
| 830 bloc = {"uri": url} | |
| 831 if cram_index is not None and os.path.exists(os.path.realpath(cram_index)): | |
| 832 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
| 833 self.subprocess_check_call( | |
| 834 ["cp", os.path.realpath(cram_index), dest + ".crai"] | |
| 835 ) | |
| 836 else: | 874 else: |
| 837 # Can happen in exotic condition | 875 fname = "%s.cram" % trackData["label"] |
| 838 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | 876 dest = "%s/%s" % (self.outdir, fname) |
| 839 # => no index generated by galaxy, but there might be one next to the symlink target | 877 bindex = fname + '.bai' |
| 840 # this trick allows to skip the bam sorting made by galaxy if already done outside | 878 url = fname |
| 841 if os.path.exists(os.path.realpath(data) + ".crai"): | 879 self.subprocess_check_call(["cp", data, dest]) |
| 842 self.symlink_or_copy(os.path.realpath(data) + ".crai", dest + ".crai") | 880 |
| 843 else: | 881 if bindex is not None and os.path.exists(bindex): |
| 844 log.warn("Could not find a cram index (.crai file) for %s", data) | 882 if not os.path.exists(dest+'.crai'): |
| 883 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
| 884 self.subprocess_check_call( | |
| 885 ["cp", os.path.realpath(cram_index), dest + ".crai"] | |
| 886 ) | |
| 887 else: | |
| 888 # Can happen in exotic condition | |
| 889 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | |
| 890 # => no index generated by galaxy, but there might be one next to the symlink target | |
| 891 # this trick allows to skip the bam sorting made by galaxy if already done outside | |
| 892 if os.path.exists(os.path.realpath(data) + ".crai"): | |
| 893 self.symlink_or_copy( | |
| 894 os.path.realpath(data) + ".crai", dest + ".crai" | |
| 895 ) | |
| 896 else: | |
| 897 log.warn("Could not find a cram index (.crai file) for %s", data) | |
| 845 trackDict = { | 898 trackDict = { |
| 846 "type": "AlignmentsTrack", | 899 "type": "AlignmentsTrack", |
| 847 "trackId": tId, | 900 "trackId": tId, |
| 848 "name": trackData["name"], | 901 "name": trackData["name"], |
| 849 "assemblyNames": [self.genome_name], | 902 "assemblyNames": [self.genome_name], |
| 850 "adapter": { | 903 "adapter": { |
| 851 "type": "CramAdapter", | 904 "type": "CramAdapter", |
| 852 "cramLocation": bloc, | 905 "cramLocation": {"uri": url}, |
| 853 "craiLocation": { | 906 "craiLocation": { |
| 854 "uri": fname + ".crai", | 907 "uri": bindex, |
| 855 }, | 908 }, |
| 856 "sequenceAdapter": self.genome_sequence_adapter, | 909 "sequenceAdapter": self.genome_sequence_adapter, |
| 857 }, | 910 }, |
| 858 "displays": [ | 911 "displays": [ |
| 859 { | 912 { |
| 871 tId = trackData["label"] | 924 tId = trackData["label"] |
| 872 # url = "%s/api/datasets/%s/display" % ( | 925 # url = "%s/api/datasets/%s/display" % ( |
| 873 # self.giURL, | 926 # self.giURL, |
| 874 # trackData["metadata"]["dataset_id"], | 927 # trackData["metadata"]["dataset_id"], |
| 875 # ) | 928 # ) |
| 876 url = "%s.vcf.gz" % tId | 929 |
| 877 dest = "%s/%s" % (self.outdir, url) | 930 useuri = trackData["useuri"].lower() == "yes" |
| 878 cmd = "bgzip -c %s > %s" % (data, dest) | 931 if useuri: |
| 879 self.subprocess_popen(cmd) | 932 url = data |
| 880 cmd = ["tabix", "-f", "-p", "vcf", dest] | 933 else: |
| 881 self.subprocess_check_call(cmd) | 934 url = "%s.vcf.gz" % tId |
| 935 dest = "%s/%s" % (self.outdir, url) | |
| 936 cmd = "bgzip -c %s > %s" % (data, dest) | |
| 937 self.subprocess_popen(cmd) | |
| 938 cmd = ["tabix", "-f", "-p", "vcf", dest] | |
| 939 self.subprocess_check_call(cmd) | |
| 882 trackDict = { | 940 trackDict = { |
| 883 "type": "VariantTrack", | 941 "type": "VariantTrack", |
| 884 "trackId": tId, | 942 "trackId": tId, |
| 885 "name": trackData["name"], | 943 "name": trackData["name"], |
| 886 "assemblyNames": [self.genome_name], | 944 "assemblyNames": [self.genome_name], |
| 887 "adapter": { | 945 "adapter": { |
| 888 "type": "VcfTabixAdapter", | 946 "type": "VcfTabixAdapter", |
| 889 "vcfGzLocation": { | 947 "vcfGzLocation": { |
| 890 "uri": url, | 948 "uri": url |
| 891 }, | 949 }, |
| 892 "index": { | 950 "index": { |
| 893 "location": { | 951 "location": { |
| 894 "uri": url + ".tbi", | 952 "uri": url + ".tbi", |
| 895 } | 953 } |
| 915 self.tracksToAdd.append(trackDict) | 973 self.tracksToAdd.append(trackDict) |
| 916 self.trackIdlist.append(tId) | 974 self.trackIdlist.append(tId) |
| 917 | 975 |
| 918 def _sort_gff(self, data, dest): | 976 def _sort_gff(self, data, dest): |
| 919 # Only index if not already done | 977 # Only index if not already done |
| 920 if not os.path.exists(dest + ".gz"): | 978 if not os.path.exists(dest): |
| 921 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s.gz'" % ( | 979 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % ( |
| 922 data, | 980 data, |
| 923 dest, | 981 dest, |
| 924 ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" | 982 ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" |
| 925 self.subprocess_popen(cmd) | 983 self.subprocess_popen(cmd) |
| 926 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"]) | 984 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) |
| 927 | 985 |
| 928 def _sort_bed(self, data, dest): | 986 def _sort_bed(self, data, dest): |
| 929 # Only index if not already done | 987 # Only index if not already done |
| 930 if not os.path.exists(dest): | 988 if not os.path.exists(dest): |
| 931 cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest) | 989 cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest) |
| 932 self.subprocess_popen(cmd) | 990 self.subprocess_popen(cmd) |
| 933 cmd = ["tabix", "-f", "-p", "bed", dest] | 991 cmd = ["tabix", "-f", "-p", "bed", dest] |
| 934 self.subprocess_check_call(cmd) | 992 self.subprocess_check_call(cmd) |
| 935 | 993 |
| 936 def add_gff(self, data, ext, trackData): | 994 def add_gff(self, data, ext, trackData): |
| 937 url = "%s.%s" % (trackData["label"], ext) | 995 useuri = trackData["useuri"].lower() == "yes" |
| 938 dest = "%s/%s" % (self.outdir, url) | 996 if useuri: |
| 939 self._sort_gff(data, dest) | 997 url = trackData["path"] |
| 940 url = url + ".gz" | 998 else: |
| 999 url = "%s.%s.gz" % (trackData["label"], ext) | |
| 1000 dest = "%s/%s" % (self.outdir, url) | |
| 1001 self._sort_gff(data, dest) | |
| 941 tId = trackData["label"] | 1002 tId = trackData["label"] |
| 942 trackDict = { | 1003 trackDict = { |
| 943 "type": "FeatureTrack", | 1004 "type": "FeatureTrack", |
| 944 "trackId": tId, | 1005 "trackId": tId, |
| 945 "name": trackData["name"], | 1006 "name": trackData["name"], |
| 970 trackDict["style"] = style_json | 1031 trackDict["style"] = style_json |
| 971 self.tracksToAdd.append(trackDict) | 1032 self.tracksToAdd.append(trackDict) |
| 972 self.trackIdlist.append(tId) | 1033 self.trackIdlist.append(tId) |
| 973 | 1034 |
| 974 def add_bed(self, data, ext, trackData): | 1035 def add_bed(self, data, ext, trackData): |
| 975 url = "%s.%s" % (trackData["label"], ext) | |
| 976 dest = "%s/%s.gz" % (self.outdir, url) | |
| 977 self._sort_bed(data, dest) | |
| 978 tId = trackData["label"] | 1036 tId = trackData["label"] |
| 979 url = url + ".gz" | 1037 useuri = trackData["useuri"].lower() == "yes" |
| 1038 if useuri: | |
| 1039 url = data | |
| 1040 else: | |
| 1041 url = "%s.%s.gz" % (trackData["label"], ext) | |
| 1042 dest = "%s/%s" % (self.outdir, url) | |
| 1043 self._sort_bed(data, dest) | |
| 980 trackDict = { | 1044 trackDict = { |
| 981 "type": "FeatureTrack", | 1045 "type": "FeatureTrack", |
| 982 "trackId": tId, | 1046 "trackId": tId, |
| 983 "name": trackData["name"], | 1047 "name": trackData["name"], |
| 984 "assemblyNames": [self.genome_name], | 1048 "assemblyNames": [self.genome_name], |
| 1066 def process_annotations(self, track): | 1130 def process_annotations(self, track): |
| 1067 category = track["category"].replace("__pd__date__pd__", TODAY) | 1131 category = track["category"].replace("__pd__date__pd__", TODAY) |
| 1068 for i, ( | 1132 for i, ( |
| 1069 dataset_path, | 1133 dataset_path, |
| 1070 dataset_ext, | 1134 dataset_ext, |
| 1135 useuri, | |
| 1071 track_human_label, | 1136 track_human_label, |
| 1072 extra_metadata, | 1137 extra_metadata, |
| 1073 ) in enumerate(track["trackfiles"]): | 1138 ) in enumerate(track["trackfiles"]): |
| 1074 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) | 1139 if not dataset_path.strip().startswith("http"): |
| 1075 for key, value in mapped_chars.items(): | 1140 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) |
| 1076 track_human_label = track_human_label.replace(value, key) | 1141 for key, value in mapped_chars.items(): |
| 1077 track_human_label = track_human_label.replace(" ", "_") | 1142 track_human_label = track_human_label.replace(value, key) |
| 1143 track_human_label = track_human_label.replace(" ", "_") | |
| 1078 outputTrackConfig = { | 1144 outputTrackConfig = { |
| 1079 "category": category, | 1145 "category": category, |
| 1080 "style": {}, | 1146 "style": {}, |
| 1081 } | 1147 } |
| 1082 | 1148 |
| 1083 outputTrackConfig["key"] = track_human_label | 1149 outputTrackConfig["key"] = track_human_label |
| 1150 outputTrackConfig["useuri"] = useuri | |
| 1084 | 1151 |
| 1085 outputTrackConfig["trackset"] = track.get("trackset", {}) | 1152 outputTrackConfig["trackset"] = track.get("trackset", {}) |
| 1086 outputTrackConfig["label"] = "%s_%i_%s" % ( | 1153 outputTrackConfig["label"] = "%s_%i_%s" % ( |
| 1087 dataset_ext, | 1154 dataset_ext, |
| 1088 i, | 1155 i, |
| 1137 dataset_path, | 1204 dataset_path, |
| 1138 outputTrackConfig, | 1205 outputTrackConfig, |
| 1139 ) | 1206 ) |
| 1140 elif dataset_ext == "bam": | 1207 elif dataset_ext == "bam": |
| 1141 real_indexes = track["conf"]["options"]["bam"]["bam_index"] | 1208 real_indexes = track["conf"]["options"]["bam"]["bam_index"] |
| 1142 if not isinstance(real_indexes, list): | |
| 1143 real_indexes = [real_indexes] | |
| 1144 | |
| 1145 self.add_bam( | 1209 self.add_bam( |
| 1146 dataset_path, | 1210 dataset_path, |
| 1147 outputTrackConfig, | 1211 outputTrackConfig, |
| 1148 track["conf"]["options"]["bam"], | 1212 bam_index=real_indexes, |
| 1149 bam_index=real_indexes[i], | |
| 1150 ) | 1213 ) |
| 1151 elif dataset_ext == "cram": | 1214 elif dataset_ext == "cram": |
| 1152 real_indexes = track["conf"]["options"]["cram"][ "cram_index"] | 1215 real_indexes = track["conf"]["options"]["cram"]["cram_index"] |
| 1153 if not isinstance(real_indexes, list): | |
| 1154 real_indexes = [real_indexes] | |
| 1155 | |
| 1156 self.add_cram( | 1216 self.add_cram( |
| 1157 dataset_path, | 1217 dataset_path, |
| 1158 outputTrackConfig, | 1218 outputTrackConfig, |
| 1159 track["conf"]["options"]["cram"], | 1219 cram_index=real_indexes, |
| 1160 cram_index=real_indexes[i], | |
| 1161 ) | 1220 ) |
| 1162 elif dataset_ext == "blastxml": | 1221 elif dataset_ext == "blastxml": |
| 1163 self.add_blastxml( | 1222 self.add_blastxml( |
| 1164 dataset_path, | 1223 dataset_path, |
| 1165 outputTrackConfig, | 1224 outputTrackConfig, |
| 1219 drdict = { | 1278 drdict = { |
| 1220 "reversed": False, | 1279 "reversed": False, |
| 1221 "assemblyName": self.genome_name, | 1280 "assemblyName": self.genome_name, |
| 1222 "start": 0, | 1281 "start": 0, |
| 1223 "end": 100000, | 1282 "end": 100000, |
| 1283 "refName": "x", | |
| 1224 } | 1284 } |
| 1225 | 1285 |
| 1226 if data.get("defaultLocation", ""): | 1286 if data.get("defaultLocation", ""): |
| 1227 ddl = data["defaultLocation"] | 1287 ddl = data["defaultLocation"] |
| 1228 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) | 1288 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) |
| 1305 self.config_json.update(config_json) | 1365 self.config_json.update(config_json) |
| 1306 with open(config_path, "w") as config_file: | 1366 with open(config_path, "w") as config_file: |
| 1307 json.dump(self.config_json, config_file, indent=2) | 1367 json.dump(self.config_json, config_file, indent=2) |
| 1308 | 1368 |
| 1309 def clone_jbrowse(self): | 1369 def clone_jbrowse(self): |
| 1310 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now """ | 1370 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now""" |
| 1311 dest = self.outdir | 1371 dest = self.outdir |
| 1312 #self.subprocess_check_call(['jbrowse', 'create', dest, '--tag', f"{JB_VER}"]) | 1372 # self.subprocess_check_call(['jbrowse', 'create', dest, '--tag', f"{JB_VER}"]) |
| 1313 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) | 1373 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) |
| 1314 for fn in [ | 1374 for fn in [ |
| 1315 "asset-manifest.json", | 1375 "asset-manifest.json", |
| 1316 "favicon.ico", | 1376 "favicon.ico", |
| 1317 "robots.txt", | 1377 "robots.txt", |
| 1339 | 1399 |
| 1340 | 1400 |
| 1341 if __name__ == "__main__": | 1401 if __name__ == "__main__": |
| 1342 parser = argparse.ArgumentParser(description="", epilog="") | 1402 parser = argparse.ArgumentParser(description="", epilog="") |
| 1343 parser.add_argument("--xml", help="Track Configuration") | 1403 parser.add_argument("--xml", help="Track Configuration") |
| 1344 parser.add_argument("--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda") | 1404 parser.add_argument( |
| 1405 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda" | |
| 1406 ) | |
| 1345 parser.add_argument("--outdir", help="Output directory", default="out") | 1407 parser.add_argument("--outdir", help="Output directory", default="out") |
| 1346 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") | 1408 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") |
| 1347 args = parser.parse_args() | 1409 args = parser.parse_args() |
| 1348 tree = ET.parse(args.xml) | 1410 tree = ET.parse(args.xml) |
| 1349 root = tree.getroot() | 1411 root = tree.getroot() |
| 1358 jc = JbrowseConnector( | 1420 jc = JbrowseConnector( |
| 1359 outdir=args.outdir, | 1421 outdir=args.outdir, |
| 1360 jbrowse2path=args.jbrowse2path, | 1422 jbrowse2path=args.jbrowse2path, |
| 1361 genomes=[ | 1423 genomes=[ |
| 1362 { | 1424 { |
| 1363 "path": os.path.realpath(x.attrib["path"]), | 1425 "path": x.attrib["path"], |
| 1426 "label": x.attrib["label"], | |
| 1427 "useuri": x.attrib["useuri"], | |
| 1364 "meta": metadata_from_node(x.find("metadata")), | 1428 "meta": metadata_from_node(x.find("metadata")), |
| 1365 } | 1429 } |
| 1366 for x in root.findall("metadata/genomes/genome") | 1430 for x in root.findall("metadata/genomes/genome") |
| 1367 ], | 1431 ], |
| 1368 ) | 1432 ) |
| 1393 pass | 1457 pass |
| 1394 | 1458 |
| 1395 trackfiles = track.findall("files/trackFile") | 1459 trackfiles = track.findall("files/trackFile") |
| 1396 if trackfiles: | 1460 if trackfiles: |
| 1397 for x in track.findall("files/trackFile"): | 1461 for x in track.findall("files/trackFile"): |
| 1462 track_conf["useuri"] = x.attrib["useuri"] | |
| 1398 if is_multi_bigwig: | 1463 if is_multi_bigwig: |
| 1399 multi_bigwig_paths.append( | 1464 multi_bigwig_paths.append( |
| 1400 ( | 1465 ( |
| 1401 x.attrib["label"], | 1466 x.attrib["label"], |
| 1467 x.attrib["useuri"], | |
| 1402 os.path.realpath(x.attrib["path"]), | 1468 os.path.realpath(x.attrib["path"]), |
| 1403 ) | 1469 ) |
| 1404 ) | 1470 ) |
| 1405 else: | 1471 else: |
| 1406 if trackfiles: | 1472 if trackfiles: |
| 1407 metadata = metadata_from_node(x.find("metadata")) | 1473 metadata = metadata_from_node(x.find("metadata")) |
| 1408 track_conf["dataset_id"] = metadata["dataset_id"] | 1474 track_conf["dataset_id"] = metadata["dataset_id"] |
| 1409 track_conf["trackfiles"].append( | 1475 if x.attrib["useuri"].lower() == "yes": |
| 1410 ( | 1476 tfa = ( |
| 1411 os.path.realpath(x.attrib["path"]), | 1477 x.attrib["path"], |
| 1412 x.attrib["ext"], | 1478 x.attrib["ext"], |
| 1479 x.attrib["useuri"], | |
| 1413 x.attrib["label"], | 1480 x.attrib["label"], |
| 1414 metadata, | 1481 metadata, |
| 1415 ) | 1482 ) |
| 1416 ) | 1483 else: |
| 1484 tfa = ( | |
| 1485 os.path.realpath(x.attrib["path"]), | |
| 1486 x.attrib["ext"], | |
| 1487 x.attrib["useuri"], | |
| 1488 x.attrib["label"], | |
| 1489 metadata, | |
| 1490 ) | |
| 1491 track_conf["trackfiles"].append(tfa) | |
| 1417 | 1492 |
| 1418 if is_multi_bigwig: | 1493 if is_multi_bigwig: |
| 1419 metadata = metadata_from_node(x.find("metadata")) | 1494 metadata = metadata_from_node(x.find("metadata")) |
| 1420 | 1495 |
| 1421 track_conf["trackfiles"].append( | 1496 track_conf["trackfiles"].append( |
| 1445 # Only pertains to gff3 + blastxml. TODO? | 1520 # Only pertains to gff3 + blastxml. TODO? |
| 1446 track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} | 1521 track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} |
| 1447 except TypeError: | 1522 except TypeError: |
| 1448 track_conf["style"] = {} | 1523 track_conf["style"] = {} |
| 1449 pass | 1524 pass |
| 1450 track_conf["conf"] = etree_to_dict(track.find("options")) | |
| 1451 keys = jc.process_annotations(track_conf) | 1525 keys = jc.process_annotations(track_conf) |
| 1452 | 1526 |
| 1453 if keys: | 1527 if keys: |
| 1454 for key in keys: | 1528 for key in keys: |
| 1455 default_session_data["visibility"][ | 1529 default_session_data["visibility"][ |
