Mercurial > repos > fubar > jbrowse2
diff jbrowse2.py @ 134:ed3a21033188 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit bb6736899ac3029f73455637a04a006fcd857fc2
author | bgruening |
---|---|
date | Sun, 20 Oct 2024 07:11:16 +0000 |
parents | cec274db51c0 |
children | 21bb464c1d53 |
line wrap: on
line diff
--- a/jbrowse2.py Fri Oct 18 11:57:55 2024 +0000 +++ b/jbrowse2.py Sun Oct 20 07:11:16 2024 +0000 @@ -13,7 +13,6 @@ import string import struct import subprocess -import tempfile import urllib.request import xml.etree.ElementTree as ET from collections import defaultdict @@ -446,7 +445,7 @@ p = subprocess.Popen( command, cwd=self.outdir, - shell=False, + shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -554,9 +553,8 @@ else: faname = gname + ".fa.gz" fadest = os.path.realpath(os.path.join(self.outdir, faname)) - cmd = ["bgzip", "-i", "-c", fapath, "-I", fadest + ".gzi"] - with open(fadest, "wb") as fout: - self.subprocess_check_call(cmd, output=fout) + cmd = "bgzip -k -i -c -I '%s.gzi' '%s' > '%s'" % (fadest, fapath, fadest) + subprocess.run(cmd, shell=True) cmd = ["samtools", "faidx", fadest] self.subprocess_check_call(cmd) contig = open(fadest + ".fai", "r").readline().strip() @@ -753,38 +751,59 @@ else: self.config_json.update(mafPlugin) - def _blastxml_to_gff3(self, xml, min_gap=10): - gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) - cmd = [ - "python", - os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"), - "--trim", - "--trim_end", - "--include_seq", - "--min_gap", - str(min_gap), - xml, - ] - subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) - gff3_unrebased.close() - logging.debug("### blastxml to gff3 cmd = %s" % " ".join(cmd)) - return gff3_unrebased.name + def _sort_gff(self, data, dest): + # Only index if not already done + if not os.path.exists(dest): + e = os.environ + e["SHELL"] = "/bin/sh" + cmd = "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest) + subprocess.run(cmd, env=e, shell=True) + self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) - def add_blastxml(self, data, trackData, blastOpts, **kwargs): - gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"]) - if "parent" in blastOpts and blastOpts["parent"] != "None": - gff3_rebased = tempfile.NamedTemporaryFile(delete=False) - cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")] - if blastOpts.get("protein", "false") == "true": - cmd.append("--protein2dna") - cmd.extend([os.path.realpath(blastOpts["parent"]), gff3]) - subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) - logging.debug("### gff3rebase cmd = %s" % " ".join(cmd)) - gff3_rebased.close() - # Replace original gff3 file - shutil.copy(gff3_rebased.name, gff3) - os.unlink(gff3_rebased.name) - self.add_gff(gff3, trackData, **kwargs) + def add_gff(self, data, trackData): + tId = trackData["label"] + useuri = trackData["useuri"].lower() == "yes" + if useuri: + url = trackData["path"] + else: + url = tId + ".gz" + dest = os.path.join(self.outdir, url) + self._sort_gff(data, dest) + categ = trackData["category"] + trackDict = { + "type": "FeatureTrack", + "trackId": tId, + "name": trackData["name"], + "assemblyNames": [trackData["assemblyNames"]], + "category": [ + categ, + ], + "adapter": { + "type": "Gff3TabixAdapter", + "gffGzLocation": { + "uri": url, + }, + "index": { + "location": { + "uri": url + ".tbi", + } + }, + }, + "displays": [ + { + "type": "LinearBasicDisplay", + "displayId": "%s-LinearBasicDisplay" % tId, + }, + { + "type": "LinearArcDisplay", + "displayId": "%s-LinearArcDisplay" % tId, + }, + ], + } + style_json = self._prepare_track_style(trackDict) + trackDict["style"] = style_json + self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) + self.trackIdlist.append(tId) def add_bigwig(self, data, trackData): tId = trackData["label"] @@ -838,9 +857,9 @@ bindex = fname + ".bai" bi = bam_indexes.split(",") bam_index = [ - x.split(" ~ ")[1].strip() + x.split("~~~")[1].strip() for x in bi - if " ~ " in x and x.split(" ~ ")[0].strip() == realFName + if "~~~" in x and x.split("~~~")[0].strip() == realFName ] logging.debug( "===realFName=%s got %s as bam_indexes %s as bi, %s for bam_index" @@ -900,9 +919,9 @@ self.subprocess_check_call(["cp", data, dest]) ci = cram_indexes.split(",") cram_index = [ - x.split(" ~ ")[1].strip() + x.split("~~~")[1].strip() for x in ci - if " ~ " in x and x.split(" ~ ")[0].strip() == realFName + if "~~~" in x and x.split("~~~")[0].strip() == realFName ] logging.debug( "===realFName=%s got %s as cram_indexes %s as ci, %s for cram_index" @@ -997,15 +1016,6 @@ self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) self.trackIdlist.append(tId) - def _sort_gff(self, data, dest): - # Only index if not already done - if not os.path.exists(dest): - e = os.environ - e['SHELL'] = '/bin/sh' - cmd = ['/bin/sh', '-c', "jbrowse sort-gff %s | bgzip -c > %s" % (data, dest)] - subprocess.run(cmd, env=e) - self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) - def _sort_bed(self, data, dest): # Only index if not already done if not os.path.exists(dest): @@ -1017,51 +1027,6 @@ cmd = ["tabix", "-f", "-p", "bed", dest] self.subprocess_check_call(cmd) - def add_gff(self, data, trackData): - tId = trackData["label"] - useuri = trackData["useuri"].lower() == "yes" - if useuri: - url = trackData["path"] - else: - url = tId + ".gz" - dest = os.path.join(self.outdir, url) - self._sort_gff(data, dest) - categ = trackData["category"] - trackDict = { - "type": "FeatureTrack", - "trackId": tId, - "name": trackData["name"], - "assemblyNames": [trackData["assemblyNames"]], - "category": [ - categ, - ], - "adapter": { - "type": "Gff3TabixAdapter", - "gffGzLocation": { - "uri": url, - }, - "index": { - "location": { - "uri": url + ".tbi", - } - }, - }, - "displays": [ - { - "type": "LinearBasicDisplay", - "displayId": "%s-LinearBasicDisplay" % tId, - }, - { - "type": "LinearArcDisplay", - "displayId": "%s-LinearArcDisplay" % tId, - }, - ], - } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json - self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict)) - self.trackIdlist.append(tId) - def add_bed(self, data, ext, trackData): bedPlugin = {"name": "BedScorePlugin", "umdLoc": {"uri": "bedscoreplugin.js"}} tId = trackData["label"] @@ -1139,7 +1104,7 @@ useuri = data.startswith("http://") or data.startswith("https://") if not useuri: if canPIF: - fakeName = "%s.paf" % tId + fakeName = os.path.join(self.outdir, "%s.paf" % tId) url = "%s.pif.gz" % tId cmd = ["cp", data, fakeName] self.subprocess_check_call(cmd) @@ -1147,7 +1112,7 @@ "jbrowse", "make-pif", fakeName, - ] # jbrowse pif input.paf --out output.pif.gz # specify output file, creates output.pif.gz.tbi also + ] self.subprocess_check_call(cmd) usePIF = True else: @@ -1160,7 +1125,7 @@ categ = trackData["category"] pg = pafOpts["genome"].split(",") pgc = [x.strip() for x in pg if x.strip() > ""] - gnomes = [x.split(" ~ ") for x in pgc] + gnomes = [x.split("~~~") for x in pgc] logging.debug("pg=%s, gnomes=%s" % (pg, gnomes)) passnames = [trackData["assemblyNames"]] # always first for i, (gpath, gname) in enumerate(gnomes): @@ -1336,12 +1301,6 @@ outputTrackConfig, cram_indexes=real_indexes, ) - elif dataset_ext == "blastxml": - self.add_blastxml( - dataset_path, - outputTrackConfig, - track["conf"]["options"]["blast"], - ) elif dataset_ext == "vcf": self.add_vcf(dataset_path, outputTrackConfig) elif dataset_ext == "paf":