Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 134:ed3a21033188 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit bb6736899ac3029f73455637a04a006fcd857fc2
author | bgruening |
---|---|
date | Sun, 20 Oct 2024 07:11:16 +0000 |
parents | cec274db51c0 |
children | 21bb464c1d53 |
comparison
equal
deleted
inserted
replaced
133:cec274db51c0 | 134:ed3a21033188 |
---|---|
11 import shutil | 11 import shutil |
12 import ssl | 12 import ssl |
13 import string | 13 import string |
14 import struct | 14 import struct |
15 import subprocess | 15 import subprocess |
16 import tempfile | |
17 import urllib.request | 16 import urllib.request |
18 import xml.etree.ElementTree as ET | 17 import xml.etree.ElementTree as ET |
19 from collections import defaultdict | 18 from collections import defaultdict |
20 | 19 |
21 logging.basicConfig(level=logging.DEBUG) | 20 logging.basicConfig(level=logging.DEBUG) |
444 if logCommands: | 443 if logCommands: |
445 log.debug(command) | 444 log.debug(command) |
446 p = subprocess.Popen( | 445 p = subprocess.Popen( |
447 command, | 446 command, |
448 cwd=self.outdir, | 447 cwd=self.outdir, |
449 shell=False, | 448 shell=True, |
450 stdin=subprocess.PIPE, | 449 stdin=subprocess.PIPE, |
451 stdout=subprocess.PIPE, | 450 stdout=subprocess.PIPE, |
452 stderr=subprocess.PIPE, | 451 stderr=subprocess.PIPE, |
453 ) | 452 ) |
454 output, err = p.communicate() | 453 output, err = p.communicate() |
552 contig = fl.decode("utf8").strip() | 551 contig = fl.decode("utf8").strip() |
553 # Merlin 172788 8 60 61 | 552 # Merlin 172788 8 60 61 |
554 else: | 553 else: |
555 faname = gname + ".fa.gz" | 554 faname = gname + ".fa.gz" |
556 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 555 fadest = os.path.realpath(os.path.join(self.outdir, faname)) |
557 cmd = ["bgzip", "-i", "-c", fapath, "-I", fadest + ".gzi"] | 556 cmd = "bgzip -k -i -c -I '%s.gzi' '%s' > '%s'" % (fadest, fapath, fadest) |
558 with open(fadest, "wb") as fout: | 557 subprocess.run(cmd, shell=True) |
559 self.subprocess_check_call(cmd, output=fout) | |
560 cmd = ["samtools", "faidx", fadest] | 558 cmd = ["samtools", "faidx", fadest] |
561 self.subprocess_check_call(cmd) | 559 self.subprocess_check_call(cmd) |
562 contig = open(fadest + ".fai", "r").readline().strip() | 560 contig = open(fadest + ".fai", "r").readline().strip() |
563 adapter = { | 561 adapter = { |
564 "type": "BgzipFastaAdapter", | 562 "type": "BgzipFastaAdapter", |
751 if self.config_json.get("plugins", None): | 749 if self.config_json.get("plugins", None): |
752 self.config_json["plugins"].append(mafPlugin["plugins"][0]) | 750 self.config_json["plugins"].append(mafPlugin["plugins"][0]) |
753 else: | 751 else: |
754 self.config_json.update(mafPlugin) | 752 self.config_json.update(mafPlugin) |
755 | 753 |
756 def _blastxml_to_gff3(self, xml, min_gap=10): | 754 def _sort_gff(self, data, dest): |
757 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | 755 # Only index if not already done |
758 cmd = [ | 756 if not os.path.exists(dest): |
759 "python", | 757 e = os.environ |
760 os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"), | 758 e["SHELL"] = "/bin/sh" |
761 "--trim", | 759 cmd = "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest) |
762 "--trim_end", | 760 subprocess.run(cmd, env=e, shell=True) |
763 "--include_seq", | 761 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) |
764 "--min_gap", | 762 |
765 str(min_gap), | 763 def add_gff(self, data, trackData): |
766 xml, | 764 tId = trackData["label"] |
767 ] | 765 useuri = trackData["useuri"].lower() == "yes" |
768 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) | 766 if useuri: |
769 gff3_unrebased.close() | 767 url = trackData["path"] |
770 logging.debug("### blastxml to gff3 cmd = %s" % " ".join(cmd)) | 768 else: |
771 return gff3_unrebased.name | 769 url = tId + ".gz" |
772 | 770 dest = os.path.join(self.outdir, url) |
773 def add_blastxml(self, data, trackData, blastOpts, **kwargs): | 771 self._sort_gff(data, dest) |
774 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"]) | 772 categ = trackData["category"] |
775 if "parent" in blastOpts and blastOpts["parent"] != "None": | 773 trackDict = { |
776 gff3_rebased = tempfile.NamedTemporaryFile(delete=False) | 774 "type": "FeatureTrack", |
777 cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")] | 775 "trackId": tId, |
778 if blastOpts.get("protein", "false") == "true": | 776 "name": trackData["name"], |
779 cmd.append("--protein2dna") | 777 "assemblyNames": [trackData["assemblyNames"]], |
780 cmd.extend([os.path.realpath(blastOpts["parent"]), gff3]) | 778 "category": [ |
781 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) | 779 categ, |
782 logging.debug("### gff3rebase cmd = %s" % " ".join(cmd)) | 780 ], |
783 gff3_rebased.close() | 781 "adapter": { |
784 # Replace original gff3 file | 782 "type": "Gff3TabixAdapter", |
785 shutil.copy(gff3_rebased.name, gff3) | 783 "gffGzLocation": { |
786 os.unlink(gff3_rebased.name) | 784 "uri": url, |
787 self.add_gff(gff3, trackData, **kwargs) | 785 }, |
786 "index": { | |
787 "location": { | |
788 "uri": url + ".tbi", | |
789 } | |
790 }, | |
791 }, | |
792 "displays": [ | |
793 { | |
794 "type": "LinearBasicDisplay", | |
795 "displayId": "%s-LinearBasicDisplay" % tId, | |
796 }, | |
797 { | |
798 "type": "LinearArcDisplay", | |
799 "displayId": "%s-LinearArcDisplay" % tId, | |
800 }, | |
801 ], | |
802 } | |
803 style_json = self._prepare_track_style(trackDict) | |
804 trackDict["style"] = style_json | |
805 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
806 self.trackIdlist.append(tId) | |
788 | 807 |
789 def add_bigwig(self, data, trackData): | 808 def add_bigwig(self, data, trackData): |
790 tId = trackData["label"] | 809 tId = trackData["label"] |
791 useuri = trackData["useuri"].lower() == "yes" | 810 useuri = trackData["useuri"].lower() == "yes" |
792 if useuri: | 811 if useuri: |
836 self.subprocess_check_call(["cp", data, dest]) | 855 self.subprocess_check_call(["cp", data, dest]) |
837 url = fname | 856 url = fname |
838 bindex = fname + ".bai" | 857 bindex = fname + ".bai" |
839 bi = bam_indexes.split(",") | 858 bi = bam_indexes.split(",") |
840 bam_index = [ | 859 bam_index = [ |
841 x.split(" ~ ")[1].strip() | 860 x.split("~~~")[1].strip() |
842 for x in bi | 861 for x in bi |
843 if " ~ " in x and x.split(" ~ ")[0].strip() == realFName | 862 if "~~~" in x and x.split("~~~")[0].strip() == realFName |
844 ] | 863 ] |
845 logging.debug( | 864 logging.debug( |
846 "===realFName=%s got %s as bam_indexes %s as bi, %s for bam_index" | 865 "===realFName=%s got %s as bam_indexes %s as bi, %s for bam_index" |
847 % (realFName, bam_indexes, bi, bam_index) | 866 % (realFName, bam_indexes, bi, bam_index) |
848 ) | 867 ) |
898 dest = os.path.join(self.outdir, fname) | 917 dest = os.path.join(self.outdir, fname) |
899 url = fname | 918 url = fname |
900 self.subprocess_check_call(["cp", data, dest]) | 919 self.subprocess_check_call(["cp", data, dest]) |
901 ci = cram_indexes.split(",") | 920 ci = cram_indexes.split(",") |
902 cram_index = [ | 921 cram_index = [ |
903 x.split(" ~ ")[1].strip() | 922 x.split("~~~")[1].strip() |
904 for x in ci | 923 for x in ci |
905 if " ~ " in x and x.split(" ~ ")[0].strip() == realFName | 924 if "~~~" in x and x.split("~~~")[0].strip() == realFName |
906 ] | 925 ] |
907 logging.debug( | 926 logging.debug( |
908 "===realFName=%s got %s as cram_indexes %s as ci, %s for cram_index" | 927 "===realFName=%s got %s as cram_indexes %s as ci, %s for cram_index" |
909 % (realFName, cram_indexes, ci, cram_index) | 928 % (realFName, cram_indexes, ci, cram_index) |
910 ) | 929 ) |
995 style_json = self._prepare_track_style(trackDict) | 1014 style_json = self._prepare_track_style(trackDict) |
996 trackDict["style"] = style_json | 1015 trackDict["style"] = style_json |
997 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | 1016 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) |
998 self.trackIdlist.append(tId) | 1017 self.trackIdlist.append(tId) |
999 | 1018 |
1000 def _sort_gff(self, data, dest): | |
1001 # Only index if not already done | |
1002 if not os.path.exists(dest): | |
1003 e = os.environ | |
1004 e['SHELL'] = '/bin/sh' | |
1005 cmd = ['/bin/sh', '-c', "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest)] | |
1006 subprocess.run(cmd, env=e) | |
1007 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) | |
1008 | |
1009 def _sort_bed(self, data, dest): | 1019 def _sort_bed(self, data, dest): |
1010 # Only index if not already done | 1020 # Only index if not already done |
1011 if not os.path.exists(dest): | 1021 if not os.path.exists(dest): |
1012 cmd = ["sort", "-k1,1", "-k2,2n", data] | 1022 cmd = ["sort", "-k1,1", "-k2,2n", data] |
1013 ps = subprocess.run(cmd, check=True, capture_output=True) | 1023 ps = subprocess.run(cmd, check=True, capture_output=True) |
1014 cmd = ["bgzip", "-c"] | 1024 cmd = ["bgzip", "-c"] |
1015 with open(dest, "wb") as fout: | 1025 with open(dest, "wb") as fout: |
1016 subprocess.run(cmd, input=ps.stdout, stdout=fout) | 1026 subprocess.run(cmd, input=ps.stdout, stdout=fout) |
1017 cmd = ["tabix", "-f", "-p", "bed", dest] | 1027 cmd = ["tabix", "-f", "-p", "bed", dest] |
1018 self.subprocess_check_call(cmd) | 1028 self.subprocess_check_call(cmd) |
1019 | |
1020 def add_gff(self, data, trackData): | |
1021 tId = trackData["label"] | |
1022 useuri = trackData["useuri"].lower() == "yes" | |
1023 if useuri: | |
1024 url = trackData["path"] | |
1025 else: | |
1026 url = tId + ".gz" | |
1027 dest = os.path.join(self.outdir, url) | |
1028 self._sort_gff(data, dest) | |
1029 categ = trackData["category"] | |
1030 trackDict = { | |
1031 "type": "FeatureTrack", | |
1032 "trackId": tId, | |
1033 "name": trackData["name"], | |
1034 "assemblyNames": [trackData["assemblyNames"]], | |
1035 "category": [ | |
1036 categ, | |
1037 ], | |
1038 "adapter": { | |
1039 "type": "Gff3TabixAdapter", | |
1040 "gffGzLocation": { | |
1041 "uri": url, | |
1042 }, | |
1043 "index": { | |
1044 "location": { | |
1045 "uri": url + ".tbi", | |
1046 } | |
1047 }, | |
1048 }, | |
1049 "displays": [ | |
1050 { | |
1051 "type": "LinearBasicDisplay", | |
1052 "displayId": "%s-LinearBasicDisplay" % tId, | |
1053 }, | |
1054 { | |
1055 "type": "LinearArcDisplay", | |
1056 "displayId": "%s-LinearArcDisplay" % tId, | |
1057 }, | |
1058 ], | |
1059 } | |
1060 style_json = self._prepare_track_style(trackDict) | |
1061 trackDict["style"] = style_json | |
1062 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) | |
1063 self.trackIdlist.append(tId) | |
1064 | 1029 |
1065 def add_bed(self, data, ext, trackData): | 1030 def add_bed(self, data, ext, trackData): |
1066 bedPlugin = {"name": "BedScorePlugin", "umdLoc": {"uri": "bedscoreplugin.js"}} | 1031 bedPlugin = {"name": "BedScorePlugin", "umdLoc": {"uri": "bedscoreplugin.js"}} |
1067 tId = trackData["label"] | 1032 tId = trackData["label"] |
1068 categ = trackData["category"] | 1033 categ = trackData["category"] |
1137 url = tId | 1102 url = tId |
1138 usePIF = False # much faster if indexed remotely or locally but broken in biocontainer. | 1103 usePIF = False # much faster if indexed remotely or locally but broken in biocontainer. |
1139 useuri = data.startswith("http://") or data.startswith("https://") | 1104 useuri = data.startswith("http://") or data.startswith("https://") |
1140 if not useuri: | 1105 if not useuri: |
1141 if canPIF: | 1106 if canPIF: |
1142 fakeName = "%s.paf" % tId | 1107 fakeName = os.path.join(self.outdir, "%s.paf" % tId) |
1143 url = "%s.pif.gz" % tId | 1108 url = "%s.pif.gz" % tId |
1144 cmd = ["cp", data, fakeName] | 1109 cmd = ["cp", data, fakeName] |
1145 self.subprocess_check_call(cmd) | 1110 self.subprocess_check_call(cmd) |
1146 cmd = [ | 1111 cmd = [ |
1147 "jbrowse", | 1112 "jbrowse", |
1148 "make-pif", | 1113 "make-pif", |
1149 fakeName, | 1114 fakeName, |
1150 ] # jbrowse pif input.paf --out output.pif.gz # specify output file, creates output.pif.gz.tbi also | 1115 ] |
1151 self.subprocess_check_call(cmd) | 1116 self.subprocess_check_call(cmd) |
1152 usePIF = True | 1117 usePIF = True |
1153 else: | 1118 else: |
1154 dest = os.path.join(self.outdir, url) | 1119 dest = os.path.join(self.outdir, url) |
1155 self.symlink_or_copy(os.path.realpath(data), dest) | 1120 self.symlink_or_copy(os.path.realpath(data), dest) |
1158 if data.endswith(".pif.gz") or data.endswith(".paf.gz"): # is tabix | 1123 if data.endswith(".pif.gz") or data.endswith(".paf.gz"): # is tabix |
1159 usePIF = True | 1124 usePIF = True |
1160 categ = trackData["category"] | 1125 categ = trackData["category"] |
1161 pg = pafOpts["genome"].split(",") | 1126 pg = pafOpts["genome"].split(",") |
1162 pgc = [x.strip() for x in pg if x.strip() > ""] | 1127 pgc = [x.strip() for x in pg if x.strip() > ""] |
1163 gnomes = [x.split(" ~ ") for x in pgc] | 1128 gnomes = [x.split("~~~") for x in pgc] |
1164 logging.debug("pg=%s, gnomes=%s" % (pg, gnomes)) | 1129 logging.debug("pg=%s, gnomes=%s" % (pg, gnomes)) |
1165 passnames = [trackData["assemblyNames"]] # always first | 1130 passnames = [trackData["assemblyNames"]] # always first |
1166 for i, (gpath, gname) in enumerate(gnomes): | 1131 for i, (gpath, gname) in enumerate(gnomes): |
1167 # may have been forgotten by user for uri | 1132 # may have been forgotten by user for uri |
1168 if len(gname) == 0: | 1133 if len(gname) == 0: |
1333 real_indexes = track["conf"]["options"]["cram"]["cram_index"] | 1298 real_indexes = track["conf"]["options"]["cram"]["cram_index"] |
1334 self.add_cram( | 1299 self.add_cram( |
1335 dataset_path, | 1300 dataset_path, |
1336 outputTrackConfig, | 1301 outputTrackConfig, |
1337 cram_indexes=real_indexes, | 1302 cram_indexes=real_indexes, |
1338 ) | |
1339 elif dataset_ext == "blastxml": | |
1340 self.add_blastxml( | |
1341 dataset_path, | |
1342 outputTrackConfig, | |
1343 track["conf"]["options"]["blast"], | |
1344 ) | 1303 ) |
1345 elif dataset_ext == "vcf": | 1304 elif dataset_ext == "vcf": |
1346 self.add_vcf(dataset_path, outputTrackConfig) | 1305 self.add_vcf(dataset_path, outputTrackConfig) |
1347 elif dataset_ext == "paf": | 1306 elif dataset_ext == "paf": |
1348 self.add_paf( | 1307 self.add_paf( |