Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 134:ed3a21033188 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit bb6736899ac3029f73455637a04a006fcd857fc2
| author | bgruening |
|---|---|
| date | Sun, 20 Oct 2024 07:11:16 +0000 |
| parents | cec274db51c0 |
| children | 21bb464c1d53 |
comparison
equal
deleted
inserted
replaced
| 133:cec274db51c0 | 134:ed3a21033188 |
|---|---|
| 11 import shutil | 11 import shutil |
| 12 import ssl | 12 import ssl |
| 13 import string | 13 import string |
| 14 import struct | 14 import struct |
| 15 import subprocess | 15 import subprocess |
| 16 import tempfile | |
| 17 import urllib.request | 16 import urllib.request |
| 18 import xml.etree.ElementTree as ET | 17 import xml.etree.ElementTree as ET |
| 19 from collections import defaultdict | 18 from collections import defaultdict |
| 20 | 19 |
| 21 logging.basicConfig(level=logging.DEBUG) | 20 logging.basicConfig(level=logging.DEBUG) |
| 444 if logCommands: | 443 if logCommands: |
| 445 log.debug(command) | 444 log.debug(command) |
| 446 p = subprocess.Popen( | 445 p = subprocess.Popen( |
| 447 command, | 446 command, |
| 448 cwd=self.outdir, | 447 cwd=self.outdir, |
| 449 shell=False, | 448 shell=True, |
| 450 stdin=subprocess.PIPE, | 449 stdin=subprocess.PIPE, |
| 451 stdout=subprocess.PIPE, | 450 stdout=subprocess.PIPE, |
| 452 stderr=subprocess.PIPE, | 451 stderr=subprocess.PIPE, |
| 453 ) | 452 ) |
| 454 output, err = p.communicate() | 453 output, err = p.communicate() |
| 552 contig = fl.decode("utf8").strip() | 551 contig = fl.decode("utf8").strip() |
| 553 # Merlin 172788 8 60 61 | 552 # Merlin 172788 8 60 61 |
| 554 else: | 553 else: |
| 555 faname = gname + ".fa.gz" | 554 faname = gname + ".fa.gz" |
| 556 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 555 fadest = os.path.realpath(os.path.join(self.outdir, faname)) |
| 557 cmd = ["bgzip", "-i", "-c", fapath, "-I", fadest + ".gzi"] | 556 cmd = "bgzip -k -i -c -I '%s.gzi' '%s' > '%s'" % (fadest, fapath, fadest) |
| 558 with open(fadest, "wb") as fout: | 557 subprocess.run(cmd, shell=True) |
| 559 self.subprocess_check_call(cmd, output=fout) | |
| 560 cmd = ["samtools", "faidx", fadest] | 558 cmd = ["samtools", "faidx", fadest] |
| 561 self.subprocess_check_call(cmd) | 559 self.subprocess_check_call(cmd) |
| 562 contig = open(fadest + ".fai", "r").readline().strip() | 560 contig = open(fadest + ".fai", "r").readline().strip() |
| 563 adapter = { | 561 adapter = { |
| 564 "type": "BgzipFastaAdapter", | 562 "type": "BgzipFastaAdapter", |
| 751 if self.config_json.get("plugins", None): | 749 if self.config_json.get("plugins", None): |
| 752 self.config_json["plugins"].append(mafPlugin["plugins"][0]) | 750 self.config_json["plugins"].append(mafPlugin["plugins"][0]) |
| 753 else: | 751 else: |
| 754 self.config_json.update(mafPlugin) | 752 self.config_json.update(mafPlugin) |
| 755 | 753 |
| 756 def _blastxml_to_gff3(self, xml, min_gap=10): | 754 def _sort_gff(self, data, dest): |
| 757 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | 755 # Only index if not already done |
| 758 cmd = [ | 756 if not os.path.exists(dest): |
| 759 "python", | 757 e = os.environ |
| 760 os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"), | 758 e["SHELL"] = "/bin/sh" |
| 761 "--trim", | 759 cmd = "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest) |
| 762 "--trim_end", | 760 subprocess.run(cmd, env=e, shell=True) |
| 763 "--include_seq", | 761 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) |
| 764 "--min_gap", | 762 |
| 765 str(min_gap), | 763 def add_gff(self, data, trackData): |
| 766 xml, | 764 tId = trackData["label"] |
| 767 ] | 765 useuri = trackData["useuri"].lower() == "yes" |
| 768 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) | 766 if useuri: |
| 769 gff3_unrebased.close() | 767 url = trackData["path"] |
| 770 logging.debug("### blastxml to gff3 cmd = %s" % " ".join(cmd)) | 768 else: |
| 771 return gff3_unrebased.name | 769 url = tId + ".gz" |
| 772 | 770 dest = os.path.join(self.outdir, url) |
| 773 def add_blastxml(self, data, trackData, blastOpts, **kwargs): | 771 self._sort_gff(data, dest) |
| 774 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"]) | 772 categ = trackData["category"] |
| 775 if "parent" in blastOpts and blastOpts["parent"] != "None": | 773 trackDict = { |
| 776 gff3_rebased = tempfile.NamedTemporaryFile(delete=False) | 774 "type": "FeatureTrack", |
| 777 cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")] | 775 "trackId": tId, |
| 778 if blastOpts.get("protein", "false") == "true": | 776 "name": trackData["name"], |
| 779 cmd.append("--protein2dna") | 777 "assemblyNames": [trackData["assemblyNames"]], |
| 780 cmd.extend([os.path.realpath(blastOpts["parent"]), gff3]) | 778 "category": [ |
| 781 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) | 779 categ, |
| 782 logging.debug("### gff3rebase cmd = %s" % " ".join(cmd)) | 780 ], |
| 783 gff3_rebased.close() | 781 "adapter": { |
| 784 # Replace original gff3 file | 782 "type": "Gff3TabixAdapter", |
| 785 shutil.copy(gff3_rebased.name, gff3) | 783 "gffGzLocation": { |
| 786 os.unlink(gff3_rebased.name) | 784 "uri": url, |
| 787 self.add_gff(gff3, trackData, **kwargs) | 785 }, |
| 786 "index": { | |
| 787 "location": { | |
| 788 "uri": url + ".tbi", | |
| 789 } | |
| 790 }, | |
| 791 }, | |
| 792 "displays": [ | |
| 793 { | |
| 794 "type": "LinearBasicDisplay", | |
| 795 "displayId": "%s-LinearBasicDisplay" % tId, | |
| 796 }, | |
| 797 { | |
| 798 "type": "LinearArcDisplay", | |
| 799 "displayId": "%s-LinearArcDisplay" % tId, | |
| 800 }, | |
| 801 ], | |
| 802 } | |
| 803 style_json = self._prepare_track_style(trackDict) | |
| 804 trackDict["style"] = style_json | |
| 805 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
| 806 self.trackIdlist.append(tId) | |
| 788 | 807 |
| 789 def add_bigwig(self, data, trackData): | 808 def add_bigwig(self, data, trackData): |
| 790 tId = trackData["label"] | 809 tId = trackData["label"] |
| 791 useuri = trackData["useuri"].lower() == "yes" | 810 useuri = trackData["useuri"].lower() == "yes" |
| 792 if useuri: | 811 if useuri: |
| 836 self.subprocess_check_call(["cp", data, dest]) | 855 self.subprocess_check_call(["cp", data, dest]) |
| 837 url = fname | 856 url = fname |
| 838 bindex = fname + ".bai" | 857 bindex = fname + ".bai" |
| 839 bi = bam_indexes.split(",") | 858 bi = bam_indexes.split(",") |
| 840 bam_index = [ | 859 bam_index = [ |
| 841 x.split(" ~ ")[1].strip() | 860 x.split("~~~")[1].strip() |
| 842 for x in bi | 861 for x in bi |
| 843 if " ~ " in x and x.split(" ~ ")[0].strip() == realFName | 862 if "~~~" in x and x.split("~~~")[0].strip() == realFName |
| 844 ] | 863 ] |
| 845 logging.debug( | 864 logging.debug( |
| 846 "===realFName=%s got %s as bam_indexes %s as bi, %s for bam_index" | 865 "===realFName=%s got %s as bam_indexes %s as bi, %s for bam_index" |
| 847 % (realFName, bam_indexes, bi, bam_index) | 866 % (realFName, bam_indexes, bi, bam_index) |
| 848 ) | 867 ) |
| 898 dest = os.path.join(self.outdir, fname) | 917 dest = os.path.join(self.outdir, fname) |
| 899 url = fname | 918 url = fname |
| 900 self.subprocess_check_call(["cp", data, dest]) | 919 self.subprocess_check_call(["cp", data, dest]) |
| 901 ci = cram_indexes.split(",") | 920 ci = cram_indexes.split(",") |
| 902 cram_index = [ | 921 cram_index = [ |
| 903 x.split(" ~ ")[1].strip() | 922 x.split("~~~")[1].strip() |
| 904 for x in ci | 923 for x in ci |
| 905 if " ~ " in x and x.split(" ~ ")[0].strip() == realFName | 924 if "~~~" in x and x.split("~~~")[0].strip() == realFName |
| 906 ] | 925 ] |
| 907 logging.debug( | 926 logging.debug( |
| 908 "===realFName=%s got %s as cram_indexes %s as ci, %s for cram_index" | 927 "===realFName=%s got %s as cram_indexes %s as ci, %s for cram_index" |
| 909 % (realFName, cram_indexes, ci, cram_index) | 928 % (realFName, cram_indexes, ci, cram_index) |
| 910 ) | 929 ) |
| 995 style_json = self._prepare_track_style(trackDict) | 1014 style_json = self._prepare_track_style(trackDict) |
| 996 trackDict["style"] = style_json | 1015 trackDict["style"] = style_json |
| 997 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | 1016 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) |
| 998 self.trackIdlist.append(tId) | 1017 self.trackIdlist.append(tId) |
| 999 | 1018 |
| 1000 def _sort_gff(self, data, dest): | |
| 1001 # Only index if not already done | |
| 1002 if not os.path.exists(dest): | |
| 1003 e = os.environ | |
| 1004 e['SHELL'] = '/bin/sh' | |
| 1005 cmd = ['/bin/sh', '-c', "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest)] | |
| 1006 subprocess.run(cmd, env=e) | |
| 1007 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) | |
| 1008 | |
| 1009 def _sort_bed(self, data, dest): | 1019 def _sort_bed(self, data, dest): |
| 1010 # Only index if not already done | 1020 # Only index if not already done |
| 1011 if not os.path.exists(dest): | 1021 if not os.path.exists(dest): |
| 1012 cmd = ["sort", "-k1,1", "-k2,2n", data] | 1022 cmd = ["sort", "-k1,1", "-k2,2n", data] |
| 1013 ps = subprocess.run(cmd, check=True, capture_output=True) | 1023 ps = subprocess.run(cmd, check=True, capture_output=True) |
| 1014 cmd = ["bgzip", "-c"] | 1024 cmd = ["bgzip", "-c"] |
| 1015 with open(dest, "wb") as fout: | 1025 with open(dest, "wb") as fout: |
| 1016 subprocess.run(cmd, input=ps.stdout, stdout=fout) | 1026 subprocess.run(cmd, input=ps.stdout, stdout=fout) |
| 1017 cmd = ["tabix", "-f", "-p", "bed", dest] | 1027 cmd = ["tabix", "-f", "-p", "bed", dest] |
| 1018 self.subprocess_check_call(cmd) | 1028 self.subprocess_check_call(cmd) |
| 1019 | |
| 1020 def add_gff(self, data, trackData): | |
| 1021 tId = trackData["label"] | |
| 1022 useuri = trackData["useuri"].lower() == "yes" | |
| 1023 if useuri: | |
| 1024 url = trackData["path"] | |
| 1025 else: | |
| 1026 url = tId + ".gz" | |
| 1027 dest = os.path.join(self.outdir, url) | |
| 1028 self._sort_gff(data, dest) | |
| 1029 categ = trackData["category"] | |
| 1030 trackDict = { | |
| 1031 "type": "FeatureTrack", | |
| 1032 "trackId": tId, | |
| 1033 "name": trackData["name"], | |
| 1034 "assemblyNames": [trackData["assemblyNames"]], | |
| 1035 "category": [ | |
| 1036 categ, | |
| 1037 ], | |
| 1038 "adapter": { | |
| 1039 "type": "Gff3TabixAdapter", | |
| 1040 "gffGzLocation": { | |
| 1041 "uri": url, | |
| 1042 }, | |
| 1043 "index": { | |
| 1044 "location": { | |
| 1045 "uri": url + ".tbi", | |
| 1046 } | |
| 1047 }, | |
| 1048 }, | |
| 1049 "displays": [ | |
| 1050 { | |
| 1051 "type": "LinearBasicDisplay", | |
| 1052 "displayId": "%s-LinearBasicDisplay" % tId, | |
| 1053 }, | |
| 1054 { | |
| 1055 "type": "LinearArcDisplay", | |
| 1056 "displayId": "%s-LinearArcDisplay" % tId, | |
| 1057 }, | |
| 1058 ], | |
| 1059 } | |
| 1060 style_json = self._prepare_track_style(trackDict) | |
| 1061 trackDict["style"] = style_json | |
| 1062 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
| 1063 self.trackIdlist.append(tId) | |
| 1064 | 1029 |
| 1065 def add_bed(self, data, ext, trackData): | 1030 def add_bed(self, data, ext, trackData): |
| 1066 bedPlugin = {"name": "BedScorePlugin", "umdLoc": {"uri": "bedscoreplugin.js"}} | 1031 bedPlugin = {"name": "BedScorePlugin", "umdLoc": {"uri": "bedscoreplugin.js"}} |
| 1067 tId = trackData["label"] | 1032 tId = trackData["label"] |
| 1068 categ = trackData["category"] | 1033 categ = trackData["category"] |
| 1137 url = tId | 1102 url = tId |
| 1138 usePIF = False # much faster if indexed remotely or locally but broken in biocontainer. | 1103 usePIF = False # much faster if indexed remotely or locally but broken in biocontainer. |
| 1139 useuri = data.startswith("http://") or data.startswith("https://") | 1104 useuri = data.startswith("http://") or data.startswith("https://") |
| 1140 if not useuri: | 1105 if not useuri: |
| 1141 if canPIF: | 1106 if canPIF: |
| 1142 fakeName = "%s.paf" % tId | 1107 fakeName = os.path.join(self.outdir, "%s.paf" % tId) |
| 1143 url = "%s.pif.gz" % tId | 1108 url = "%s.pif.gz" % tId |
| 1144 cmd = ["cp", data, fakeName] | 1109 cmd = ["cp", data, fakeName] |
| 1145 self.subprocess_check_call(cmd) | 1110 self.subprocess_check_call(cmd) |
| 1146 cmd = [ | 1111 cmd = [ |
| 1147 "jbrowse", | 1112 "jbrowse", |
| 1148 "make-pif", | 1113 "make-pif", |
| 1149 fakeName, | 1114 fakeName, |
| 1150 ] # jbrowse pif input.paf --out output.pif.gz # specify output file, creates output.pif.gz.tbi also | 1115 ] |
| 1151 self.subprocess_check_call(cmd) | 1116 self.subprocess_check_call(cmd) |
| 1152 usePIF = True | 1117 usePIF = True |
| 1153 else: | 1118 else: |
| 1154 dest = os.path.join(self.outdir, url) | 1119 dest = os.path.join(self.outdir, url) |
| 1155 self.symlink_or_copy(os.path.realpath(data), dest) | 1120 self.symlink_or_copy(os.path.realpath(data), dest) |
| 1158 if data.endswith(".pif.gz") or data.endswith(".paf.gz"): # is tabix | 1123 if data.endswith(".pif.gz") or data.endswith(".paf.gz"): # is tabix |
| 1159 usePIF = True | 1124 usePIF = True |
| 1160 categ = trackData["category"] | 1125 categ = trackData["category"] |
| 1161 pg = pafOpts["genome"].split(",") | 1126 pg = pafOpts["genome"].split(",") |
| 1162 pgc = [x.strip() for x in pg if x.strip() > ""] | 1127 pgc = [x.strip() for x in pg if x.strip() > ""] |
| 1163 gnomes = [x.split(" ~ ") for x in pgc] | 1128 gnomes = [x.split("~~~") for x in pgc] |
| 1164 logging.debug("pg=%s, gnomes=%s" % (pg, gnomes)) | 1129 logging.debug("pg=%s, gnomes=%s" % (pg, gnomes)) |
| 1165 passnames = [trackData["assemblyNames"]] # always first | 1130 passnames = [trackData["assemblyNames"]] # always first |
| 1166 for i, (gpath, gname) in enumerate(gnomes): | 1131 for i, (gpath, gname) in enumerate(gnomes): |
| 1167 # may have been forgotten by user for uri | 1132 # may have been forgotten by user for uri |
| 1168 if len(gname) == 0: | 1133 if len(gname) == 0: |
| 1333 real_indexes = track["conf"]["options"]["cram"]["cram_index"] | 1298 real_indexes = track["conf"]["options"]["cram"]["cram_index"] |
| 1334 self.add_cram( | 1299 self.add_cram( |
| 1335 dataset_path, | 1300 dataset_path, |
| 1336 outputTrackConfig, | 1301 outputTrackConfig, |
| 1337 cram_indexes=real_indexes, | 1302 cram_indexes=real_indexes, |
| 1338 ) | |
| 1339 elif dataset_ext == "blastxml": | |
| 1340 self.add_blastxml( | |
| 1341 dataset_path, | |
| 1342 outputTrackConfig, | |
| 1343 track["conf"]["options"]["blast"], | |
| 1344 ) | 1303 ) |
| 1345 elif dataset_ext == "vcf": | 1304 elif dataset_ext == "vcf": |
| 1346 self.add_vcf(dataset_path, outputTrackConfig) | 1305 self.add_vcf(dataset_path, outputTrackConfig) |
| 1347 elif dataset_ext == "paf": | 1306 elif dataset_ext == "paf": |
| 1348 self.add_paf( | 1307 self.add_paf( |
