view autogenJB2.py @ 97:74074746ccd8 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 1c04ea76330d0148a7682b3c26846e5a5df21f99
author fubar
date Sat, 01 Jun 2024 05:37:13 +0000
parents 4c517a0041a8
children b1260bca5fdc
line wrap: on
line source

import argparse
import logging
import os
import sys

from jbrowse2 import JbrowseConnector as jbC


logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger("jbrowse")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="", epilog="")
    parser.add_argument("--sessName", help="Session name", default="AutoJBrowse")
    parser.add_argument(
        "--trackmeta",
        help="Repeatable 'filename,filext,filepath,[bai/crai path for filesystem bam/cram]' for JBrowse2 tracks",
        default=[],
        action="append",
    )
    parser.add_argument(
        "--referencemeta",
        help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks - usually only one needed",
        default=[],
        action="append",
    )
    parser.add_argument(
        "--pafmeta",
        help="Repeatable. Each is a 'pafname, filext, filepath, ... ,' for a JBrowse2 paf track",
        default=[],
        action="append",
    )
    parser.add_argument(
        "--pafreferencemeta",
        help="Repeatable. Each is a 'pafname,refpath,refname' Every pafname must have one or more",
        default=[],
        action="append",
    )
    parser.add_argument(
        "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda"
    )
    parser.add_argument("--outdir", help="Output directory", required=True)
    parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2")
    args = parser.parse_args()
    sessName = args.sessName
    default_session_data = {}
    # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key'
    trackList = [x.strip().split(",") for x in args.trackmeta if x > ""]
    refList = [x.strip().split(",") for x in args.referencemeta if x > ""]
    if len(refList) > 0:
        listgenomes = [f for f in refList if f[1] in ["fasta", "fasta.gz"]]
        # assume no pafs here
        if len(listgenomes) > 0:
            genome_paths = [x[0] for x in listgenomes]
            genome_names = [x[2] for x in listgenomes]
            guseuri = []
            for x in genome_paths:
                if x.startswith("http://") or x.startswith("https://"):
                    guseuri.append("yes")
                else:
                    guseuri.append("no")
            jc = jbC(
                outdir=args.outdir,
                jbrowse2path=args.jbrowse2path,
            )
            genomes = (
                [
                    {
                        "path": x,
                        "label": genome_names[i],
                        "useuri": guseuri[i],
                        "meta": {
                            "name": genome_names[i],
                            "dataset_dname": genome_names[i],
                        },
                    }
                    for i, x in enumerate(genome_paths)
                ],
            )
            logging.debug("@@@autogenJB2 paths=%s, genomes=%s" % (genome_paths, genomes))
            assref_name = jc.process_genomes(genomes[0])
            if not default_session_data.get(assref_name, None):
                default_session_data[assref_name] = {
                    "tracks": [],
                    "style": {},
                    "style_labels": {},
                    "visibility": {
                        "default_on": [],
                        "default_off": [],
                    },
                }
            listtracks = trackList
            tnames = [x[2] for x in listtracks]
            texts = [x[1] for x in listtracks]
            for i, track in enumerate(listtracks):
                track_conf = {
                    "trackfiles": [],
                    "category": "autogenerated",
                    "assemblyNames": assref_name,
                }
                tpath, trext, trackname = track[:3]
                track_conf["dataset_id"] = trackname
                useuri = "no"
                if tpath.startswith("http://") or tpath.startswith("https://"):
                    useuri = "yes"
                if trext == "paf":
                    refdat = ["%s ~ %s" % (x[0],x[2]) for x in listtracks if x[1] in ["fasta", "fasta.gz"]]
                    if len(refdat) == 0:
                        jc.logging.warn(
                            "!! No reference file for %s found. Using main reference"
                            % (refname)
                        )
                        refdat = ["%s ~ %s" % (genomes[0].path, assref_name),]
                    else:
                        track_conf.update(
                            {
                                "conf": {
                                    "options": {
                                        "paf": {
                                            "genome": ",".join(refdat)
                                        }
                                    }
                                }
                            }
                        )
                elif trext == "bam":
                    ipath = track[3]
                    if not os.path.exists(ipath):
                        ipath = os.path.realpath(
                            os.path.join(jc.outdir, trackname + ".bai")
                        )
                        cmd = [
                            "samtools",
                            "index",
                            "-b",
                            "-o",
                            ipath,
                            os.path.realpath(track[0]),
                        ]
                        sys.stdout.write("#### calling %s" % " ".join(cmd))
                        jc.subprocess_check_call(cmd)
                    track_conf.update(
                        {"conf": {"options": {"bam": {"bam_index": " %s ~ %s," % (tpath,ipath)}}}}
                    )
                elif trext == "cram":
                    ipath = track[3]
                    if not os.path.exists(ipath):
                        ipath = os.path.realpath(
                            os.path.join("./", trackname + ".crai")
                        )
                        cmd = [
                            "samtools",
                            "index",
                            "-c",
                            "-o",
                            ipath,
                            os.path.realpath(track[0]),
                        ]
                        jc.subprocess_check_call(cmd)
                    track_conf.update(
                        {"conf": {"options": {"cram": {"cram_index": "%s ~ %s," % (tpath, ipath)}}}}
                    )
                track_conf["path"] = tpath
                track_conf["format"] = trext
                track_conf["name"] = trackname
                track_conf["label"] = trackname
                track_conf["trackfiles"].append((tpath, trext, useuri, trackname, {}))
                keys = jc.process_annotations(track_conf)

                if keys:
                    for key in keys:
                        if trext in [
                            "bigwig",
                            "gff",
                            "gff3",
                            "vcf",
                            "maf",
                            "bed",
                            "hic"
                        ]:
                            default_session_data[assref_name]["visibility"]["default_on"].append(key)
                        else:
                            default_session_data[assref_name]["visibility"]["default_off"].append(
                                key
                            )
                        if trext in ["gff", "gff3", "bed", "vcf", "maf", "blastxml"]:
                            ttype = "LinearBasicDisplay"
                            if trext == "vcf":
                                ttype = "LinearVariantDisplay"
                            style_json = {
                                "type": ttype,
                                "trackShowLabels": False,
                                "trackShowDescriptions": False,
                            }
                            default_session_data[assref_name]["style"][key] = style_json
                            default_session_data[assref_name]["tracks"].append(key)
            # general_data = {
            # "analytics": root.find("metadata/general/analytics").text,
            # "primary_color": root.find("metadata/general/primary_color").text,
            # "secondary_color": root.find("metadata/general/secondary_color").text,
            # "tertiary_color": root.find("metadata/general/tertiary_color").text,
            # "quaternary_color": root.find("metadata/general/quaternary_color").text,
            # "font_size": root.find("metadata/general/font_size").text,
            # }
            jc.add_general_configuration({})
            trackconf = jc.config_json.get("tracks", [])
            for gnome in jc.genome_names:
                trackconf += jc.tracksToAdd[gnome]
            logging.debug(
                "++++ adding trackconf=%s for gnome %s" % (trackconf, gnome)
            )
            jc.config_json["tracks"] = trackconf
            assconf = jc.config_json.get("assemblies", [])
            assconf += jc.assemblies
            jc.config_json["assemblies"] = assconf
            logging.debug("+++assemblies=%s, gnames=%s" % (assconf, jc.genome_names))
            jc.write_config()
            default_session_data.update({"session_name": sessName})
            track_conf.update(default_session_data)
            jc.add_default_session(default_session_data)
            # jc.add_defsess_to_index(default_session_data)
            # jc.text_index() not sure what broke here.
    else:
        sys.stderr.write(
            "!!!! Collection has no suitable trackfiles for autogenJB2 - nothing to process"
        )