Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 56:c0097a584a8a draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 4b5df41484f6bdf316edaf95b53c92d328ec1674
| author | fubar |
|---|---|
| date | Tue, 19 Mar 2024 02:33:40 +0000 |
| parents | 469c0f6d87d7 |
| children | 94264fe60478 |
comparison
equal
deleted
inserted
replaced
| 55:469c0f6d87d7 | 56:c0097a584a8a |
|---|---|
| 17 from collections import defaultdict | 17 from collections import defaultdict |
| 18 | 18 |
| 19 logging.basicConfig(level=logging.INFO) | 19 logging.basicConfig(level=logging.INFO) |
| 20 log = logging.getLogger("jbrowse") | 20 log = logging.getLogger("jbrowse") |
| 21 | 21 |
| 22 JB2VER = "v2.10.2" | 22 JB2VER = "v2.10.3" |
| 23 # version pinned for cloning | 23 # version pinned for cloning |
| 24 | 24 |
| 25 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") | 25 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") |
| 26 GALAXY_INFRASTRUCTURE_URL = None | 26 GALAXY_INFRASTRUCTURE_URL = None |
| 27 | 27 |
| 469 genome_name # first one for all tracks - other than paf | 469 genome_name # first one for all tracks - other than paf |
| 470 ) | 470 ) |
| 471 self.genome_sequence_adapter = assem["sequence"]["adapter"] | 471 self.genome_sequence_adapter = assem["sequence"]["adapter"] |
| 472 self.genome_firstcontig = None | 472 self.genome_firstcontig = None |
| 473 if not useuri: | 473 if not useuri: |
| 474 # https://lazarus.name/jbrowse/fish/bigwig_0_coverage_bedgraph_cov_count_count_bw.bigwig | |
| 475 # https://lazarus.name/jbrowse/fish/klBraLanc5.haps_combined.decontam.20230620.fasta.fa.gz | |
| 476 fl = open(fapath, "r").readline() | 474 fl = open(fapath, "r").readline() |
| 477 fls = fl.strip().split(">") | 475 fls = fl.strip().split(">") |
| 478 if len(fls) > 1: | 476 if len(fls) > 1: |
| 479 fl = fls[1] | 477 fl = fls[1] |
| 480 if len(fl.split()) > 1: | 478 if len(fl.split()) > 1: |
| 604 useuri = trackData["useuri"].lower() == "yes" | 602 useuri = trackData["useuri"].lower() == "yes" |
| 605 if useuri: | 603 if useuri: |
| 606 uri = data | 604 uri = data |
| 607 else: | 605 else: |
| 608 uri = trackData["hic_url"] | 606 uri = trackData["hic_url"] |
| 607 categ = trackData['category'] | |
| 609 trackDict = { | 608 trackDict = { |
| 610 "type": "HicTrack", | 609 "type": "HicTrack", |
| 611 "trackId": tId, | 610 "trackId": tId, |
| 612 "name": uri, | 611 "name": uri, |
| 613 "assemblyNames": [self.genome_name], | 612 "assemblyNames": [self.genome_name], |
| 613 "category": [categ,], | |
| 614 "adapter": { | 614 "adapter": { |
| 615 "type": "HicAdapter", | 615 "type": "HicAdapter", |
| 616 "hicLocation": uri, | 616 "hicLocation": uri, |
| 617 }, | 617 }, |
| 618 "displays": [ | 618 "displays": [ |
| 641 "name": "MafViewer", | 641 "name": "MafViewer", |
| 642 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", | 642 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", |
| 643 } | 643 } |
| 644 ] | 644 ] |
| 645 } | 645 } |
| 646 | 646 categ = trackData['category'] |
| 647 fname = "%s.bed" % tId | 647 fname = "%s.bed" % tId |
| 648 dest = "%s/%s" % (self.outdir, fname) | 648 dest = "%s/%s" % (self.outdir, fname) |
| 649 gname = self.genome_name | 649 gname = self.genome_name |
| 650 cmd = [ | 650 cmd = [ |
| 651 "bash", | 651 "bash", |
| 667 samples = [x.split(".")[0] for x in samp] | 667 samples = [x.split(".")[0] for x in samp] |
| 668 trackDict = { | 668 trackDict = { |
| 669 "type": "MafTrack", | 669 "type": "MafTrack", |
| 670 "trackId": tId, | 670 "trackId": tId, |
| 671 "name": trackData["name"], | 671 "name": trackData["name"], |
| 672 "category": [categ,], | |
| 672 "adapter": { | 673 "adapter": { |
| 673 "type": "MafTabixAdapter", | 674 "type": "MafTabixAdapter", |
| 674 "samples": samples, | 675 "samples": samples, |
| 675 "bedGzLocation": { | 676 "bedGzLocation": { |
| 676 "uri": fname + ".sorted.bed.gz", | 677 "uri": fname + ".sorted.bed.gz", |
| 736 url = "%s.gff3" % trackData["label"] | 737 url = "%s.gff3" % trackData["label"] |
| 737 dest = "%s/%s" % (self.outdir, url) | 738 dest = "%s/%s" % (self.outdir, url) |
| 738 self._sort_gff(gff3, dest) | 739 self._sort_gff(gff3, dest) |
| 739 url = url + ".gz" | 740 url = url + ".gz" |
| 740 tId = trackData["label"] | 741 tId = trackData["label"] |
| 742 categ = trackData['category'] | |
| 741 trackDict = { | 743 trackDict = { |
| 742 "type": "FeatureTrack", | 744 "type": "FeatureTrack", |
| 743 "trackId": tId, | 745 "trackId": tId, |
| 744 "name": trackData["name"], | 746 "name": trackData["name"], |
| 745 "assemblyNames": [self.genome_name], | 747 "assemblyNames": [self.genome_name], |
| 748 "category": [categ,], | |
| 746 "adapter": { | 749 "adapter": { |
| 747 "type": "Gff3TabixAdapter", | 750 "type": "Gff3TabixAdapter", |
| 748 "gffGzLocation": { | 751 "gffGzLocation": { |
| 749 "uri": url, | 752 "uri": url, |
| 750 }, | 753 }, |
| 789 dest = os.path.join(self.outdir, url) | 792 dest = os.path.join(self.outdir, url) |
| 790 cmd = ["cp", data, dest] | 793 cmd = ["cp", data, dest] |
| 791 self.subprocess_check_call(cmd) | 794 self.subprocess_check_call(cmd) |
| 792 bwloc = {"uri": url} | 795 bwloc = {"uri": url} |
| 793 tId = trackData["label"] | 796 tId = trackData["label"] |
| 797 categ = trackData['category'] | |
| 794 trackDict = { | 798 trackDict = { |
| 795 "type": "QuantitativeTrack", | 799 "type": "QuantitativeTrack", |
| 796 "trackId": tId, | 800 "trackId": tId, |
| 797 "name": trackData["name"], | 801 "name": trackData["name"], |
| 802 "category": [categ,], | |
| 798 "assemblyNames": [ | 803 "assemblyNames": [ |
| 799 self.genome_name, | 804 self.genome_name, |
| 800 ], | 805 ], |
| 801 "adapter": { | 806 "adapter": { |
| 802 "type": "BigWigAdapter", | 807 "type": "BigWigAdapter", |
| 816 | 821 |
| 817 def add_bam(self, data, trackData, bam_index=None, **kwargs): | 822 def add_bam(self, data, trackData, bam_index=None, **kwargs): |
| 818 tId = trackData["label"] | 823 tId = trackData["label"] |
| 819 useuri = trackData["useuri"].lower() == "yes" | 824 useuri = trackData["useuri"].lower() == "yes" |
| 820 bindex = bam_index | 825 bindex = bam_index |
| 826 categ = trackData['category'] | |
| 821 if useuri: | 827 if useuri: |
| 822 url = data | 828 url = data |
| 823 else: | 829 else: |
| 824 fname = "%s.bam" % trackData["label"] | 830 fname = "%s.bam" % trackData["label"] |
| 825 dest = "%s/%s" % (self.outdir, fname) | 831 dest = "%s/%s" % (self.outdir, fname) |
| 843 log.warn("Could not find a bam index (.bai file) for %s", data) | 849 log.warn("Could not find a bam index (.bai file) for %s", data) |
| 844 trackDict = { | 850 trackDict = { |
| 845 "type": "AlignmentsTrack", | 851 "type": "AlignmentsTrack", |
| 846 "trackId": tId, | 852 "trackId": tId, |
| 847 "name": trackData["name"], | 853 "name": trackData["name"], |
| 854 "category": [categ,], | |
| 848 "assemblyNames": [self.genome_name], | 855 "assemblyNames": [self.genome_name], |
| 849 "adapter": { | 856 "adapter": { |
| 850 "type": "BamAdapter", | 857 "type": "BamAdapter", |
| 851 "bamLocation": {"uri": url}, | 858 "bamLocation": {"uri": url}, |
| 852 "index": { | 859 "index": { |
| 867 self.tracksToAdd.append(trackDict) | 874 self.tracksToAdd.append(trackDict) |
| 868 self.trackIdlist.append(tId) | 875 self.trackIdlist.append(tId) |
| 869 | 876 |
| 870 def add_cram(self, data, trackData, cram_index=None, **kwargs): | 877 def add_cram(self, data, trackData, cram_index=None, **kwargs): |
| 871 tId = trackData["label"] | 878 tId = trackData["label"] |
| 879 categ = trackData['category'] | |
| 872 useuri = trackData["useuri"].lower() == "yes" | 880 useuri = trackData["useuri"].lower() == "yes" |
| 873 if useuri: | 881 if useuri: |
| 874 url = data | 882 url = data |
| 875 else: | 883 else: |
| 876 fname = "%s.cram" % trackData["label"] | 884 fname = "%s.cram" % trackData["label"] |
| 890 self.subprocess_check_call(cmd) | 898 self.subprocess_check_call(cmd) |
| 891 trackDict = { | 899 trackDict = { |
| 892 "type": "AlignmentsTrack", | 900 "type": "AlignmentsTrack", |
| 893 "trackId": tId, | 901 "trackId": tId, |
| 894 "name": trackData["name"], | 902 "name": trackData["name"], |
| 903 "category": [categ,], | |
| 895 "assemblyNames": [self.genome_name], | 904 "assemblyNames": [self.genome_name], |
| 896 "adapter": { | 905 "adapter": { |
| 897 "type": "CramAdapter", | 906 "type": "CramAdapter", |
| 898 "cramLocation": {"uri": url}, | 907 "cramLocation": {"uri": url}, |
| 899 "craiLocation": { | 908 "craiLocation": { |
| 917 tId = trackData["label"] | 926 tId = trackData["label"] |
| 918 # url = "%s/api/datasets/%s/display" % ( | 927 # url = "%s/api/datasets/%s/display" % ( |
| 919 # self.giURL, | 928 # self.giURL, |
| 920 # trackData["metadata"]["dataset_id"], | 929 # trackData["metadata"]["dataset_id"], |
| 921 # ) | 930 # ) |
| 922 | 931 categ = trackData['category'] |
| 923 useuri = trackData["useuri"].lower() == "yes" | 932 useuri = trackData["useuri"].lower() == "yes" |
| 924 if useuri: | 933 if useuri: |
| 925 url = data | 934 url = data |
| 926 else: | 935 else: |
| 927 url = "%s.vcf.gz" % tId | 936 url = "%s.vcf.gz" % tId |
| 933 trackDict = { | 942 trackDict = { |
| 934 "type": "VariantTrack", | 943 "type": "VariantTrack", |
| 935 "trackId": tId, | 944 "trackId": tId, |
| 936 "name": trackData["name"], | 945 "name": trackData["name"], |
| 937 "assemblyNames": [self.genome_name], | 946 "assemblyNames": [self.genome_name], |
| 947 "category": [categ,], | |
| 938 "adapter": { | 948 "adapter": { |
| 939 "type": "VcfTabixAdapter", | 949 "type": "VcfTabixAdapter", |
| 940 "vcfGzLocation": { | 950 "vcfGzLocation": { |
| 941 "uri": url | 951 "uri": url |
| 942 }, | 952 }, |
| 991 else: | 1001 else: |
| 992 url = "%s.%s.gz" % (trackData["label"], ext) | 1002 url = "%s.%s.gz" % (trackData["label"], ext) |
| 993 dest = "%s/%s" % (self.outdir, url) | 1003 dest = "%s/%s" % (self.outdir, url) |
| 994 self._sort_gff(data, dest) | 1004 self._sort_gff(data, dest) |
| 995 tId = trackData["label"] | 1005 tId = trackData["label"] |
| 1006 categ = trackData['category'] | |
| 996 trackDict = { | 1007 trackDict = { |
| 997 "type": "FeatureTrack", | 1008 "type": "FeatureTrack", |
| 998 "trackId": tId, | 1009 "trackId": tId, |
| 999 "name": trackData["name"], | 1010 "name": trackData["name"], |
| 1000 "assemblyNames": [self.genome_name], | 1011 "assemblyNames": [self.genome_name], |
| 1012 "category": [categ,], | |
| 1001 "adapter": { | 1013 "adapter": { |
| 1002 "type": "Gff3TabixAdapter", | 1014 "type": "Gff3TabixAdapter", |
| 1003 "gffGzLocation": { | 1015 "gffGzLocation": { |
| 1004 "uri": url, | 1016 "uri": url, |
| 1005 }, | 1017 }, |
| 1025 self.tracksToAdd.append(trackDict) | 1037 self.tracksToAdd.append(trackDict) |
| 1026 self.trackIdlist.append(tId) | 1038 self.trackIdlist.append(tId) |
| 1027 | 1039 |
| 1028 def add_bed(self, data, ext, trackData): | 1040 def add_bed(self, data, ext, trackData): |
| 1029 tId = trackData["label"] | 1041 tId = trackData["label"] |
| 1042 categ = trackData['category'] | |
| 1030 useuri = trackData["useuri"].lower() == "yes" | 1043 useuri = trackData["useuri"].lower() == "yes" |
| 1031 if useuri: | 1044 if useuri: |
| 1032 url = data | 1045 url = data |
| 1033 else: | 1046 else: |
| 1034 url = "%s.%s.gz" % (trackData["label"], ext) | 1047 url = "%s.%s.gz" % (trackData["label"], ext) |
| 1038 "type": "FeatureTrack", | 1051 "type": "FeatureTrack", |
| 1039 "trackId": tId, | 1052 "trackId": tId, |
| 1040 "name": trackData["name"], | 1053 "name": trackData["name"], |
| 1041 "assemblyNames": [self.genome_name], | 1054 "assemblyNames": [self.genome_name], |
| 1042 "adapter": { | 1055 "adapter": { |
| 1056 "category": [categ,], | |
| 1043 "type": "BedTabixAdapter", | 1057 "type": "BedTabixAdapter", |
| 1044 "bedGzLocation": { | 1058 "bedGzLocation": { |
| 1045 "uri": url, | 1059 "uri": url, |
| 1046 }, | 1060 }, |
| 1047 "index": { | 1061 "index": { |
| 1071 self.trackIdlist.append(tId) | 1085 self.trackIdlist.append(tId) |
| 1072 | 1086 |
| 1073 def add_paf(self, data, trackData, pafOpts, **kwargs): | 1087 def add_paf(self, data, trackData, pafOpts, **kwargs): |
| 1074 tname = trackData["name"] | 1088 tname = trackData["name"] |
| 1075 tId = trackData["label"] | 1089 tId = trackData["label"] |
| 1090 categ = trackData['category'] | |
| 1076 pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")] | 1091 pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")] |
| 1077 pgpaths = [x.strip() for x in pafOpts["genome"].split(",")] | 1092 pgpaths = [x.strip() for x in pafOpts["genome"].split(",")] |
| 1078 passnames = [self.genome_name] # always first | 1093 passnames = [self.genome_name] # always first |
| 1079 for i, gname in enumerate(pgnames): | 1094 for i, gname in enumerate(pgnames): |
| 1080 if len(gname.split()) > 1: | 1095 if len(gname.split()) > 1: |
| 1097 self.symlink_or_copy(os.path.realpath(data), dest) | 1112 self.symlink_or_copy(os.path.realpath(data), dest) |
| 1098 trackDict = { | 1113 trackDict = { |
| 1099 "type": "SyntenyTrack", | 1114 "type": "SyntenyTrack", |
| 1100 "trackId": tId, | 1115 "trackId": tId, |
| 1101 "assemblyNames": passnames, | 1116 "assemblyNames": passnames, |
| 1117 "category": [categ,], | |
| 1102 "name": tname, | 1118 "name": tname, |
| 1103 "adapter": { | 1119 "adapter": { |
| 1104 "type": "PAFAdapter", | 1120 "type": "PAFAdapter", |
| 1105 "pafLocation": {"uri": url}, | 1121 "pafLocation": {"uri": url}, |
| 1106 "assemblyNames": passnames, | 1122 "assemblyNames": passnames, |
| 1107 }, | 1123 }, |
| 1108 # "displays": [ | 1124 "displays": [ |
| 1109 # { | 1125 { |
| 1110 # "type": "LinearSyntenyDisplay", | 1126 "type": "LinearSyntenyDisplay", |
| 1111 # "displayId": "%s-LinearSyntenyDisplay" % tId, | 1127 "displayId": "%s-LinearSyntenyDisplay" % tId, |
| 1112 # }, | 1128 }, |
| 1113 # { | 1129 { |
| 1114 # "type": "DotPlotDisplay", | 1130 "type": "DotPlotDisplay", |
| 1115 # "displayId": "%s-DotPlotDisplay" % tId, | 1131 "displayId": "%s-DotPlotDisplay" % tId, |
| 1116 # }, | 1132 }, |
| 1117 # ], | 1133 ], |
| 1118 } | 1134 } |
| 1119 style_json = self._prepare_track_style(trackDict) | 1135 style_json = self._prepare_track_style(trackDict) |
| 1120 trackDict["style"] = style_json | 1136 trackDict["style"] = style_json |
| 1121 self.tracksToAdd.append(trackDict) | 1137 self.tracksToAdd.append(trackDict) |
| 1122 self.trackIdlist.append(tId) | 1138 self.trackIdlist.append(tId) |
| 1360 config_json["configuration"].update(config_data) | 1376 config_json["configuration"].update(config_data) |
| 1361 self.config_json.update(config_json) | 1377 self.config_json.update(config_json) |
| 1362 with open(config_path, "w") as config_file: | 1378 with open(config_path, "w") as config_file: |
| 1363 json.dump(self.config_json, config_file, indent=2) | 1379 json.dump(self.config_json, config_file, indent=2) |
| 1364 | 1380 |
| 1365 def clone_jbrowse(self): | 1381 def clone_jbrowse(self, realclone=True): |
| 1366 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now""" | 1382 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now""" |
| 1367 dest = self.outdir | 1383 dest = self.outdir |
| 1368 # self.subprocess_check_call(['jbrowse', 'create', dest, '--tag', f"{JB_VER}"]) | 1384 if realclone: |
| 1369 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) | 1385 self.subprocess_check_call(['jbrowse', 'create', dest,"-f", '--tag', f"{JB2VER}"]) |
| 1386 else: | |
| 1387 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) | |
| 1370 for fn in [ | 1388 for fn in [ |
| 1371 "asset-manifest.json", | 1389 "asset-manifest.json", |
| 1372 "favicon.ico", | 1390 "favicon.ico", |
| 1373 "robots.txt", | 1391 "robots.txt", |
| 1374 "umd_plugin.js", | 1392 "umd_plugin.js", |
