Mercurial > repos > fubar > jbrowse2
diff autogenJB2.py @ 35:15da358c3108 draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 80b849766a962bac4bd0bb8cb69c118cc42699cd-dirty
author | fubar |
---|---|
date | Wed, 28 Feb 2024 10:08:57 +0000 |
parents | 8f02a84ee278 |
children | 5f39f745682f |
line wrap: on
line diff
--- a/autogenJB2.py Sun Feb 25 04:18:53 2024 +0000 +++ b/autogenJB2.py Wed Feb 28 10:08:57 2024 +0000 @@ -1,65 +1,66 @@ import argparse -import re +import logging import sys -from jbrowse2 import jbrowseConnector as jbC +from jbrowse2 import JbrowseConnector as jbC + +logging.basicConfig(level=logging.debug) +log = logging.getLogger("jbrowse") -def makeDefaultLocation(jc, defLoc=None): - - refName = None - drdict = { - "reversed": False, - "assemblyName": jc.genome_name, - "start": 0, - "end": 100000, - } +def makeDefaultLocation(): - if defLoc: - loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", defLoc) - # allow commas like 100,000 but ignore as integer - if loc_match: - refName = loc_match.group(1) - drdict["refName"] = refName - if loc_match.group(2) > "": - drdict["start"] = int(loc_match.group(2).replace(",", "")) - if loc_match.group(3) > "": - drdict["end"] = int(loc_match.group(3).replace(",", "")) - else: - jc.logging.info( - "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" - % defLoc - ) - else: - drdict["refName"] = jc.genome_firstcontig - if drdict.get("refName", None): - jc.logging.info("@@@ defaultlocation %s for default session" % drdict) - return drdict - else: - jc.logging.info("@@@ no contig name found for default session - please add one!") - return None + refName = jc.genome_firstcontig + defloc = "%s:100..10000" % refName + print ('defloc',defloc) + return defloc if __name__ == "__main__": parser = argparse.ArgumentParser(description="", epilog="") - parser.add_argument("--sessname", help="Session name", default="AutoJBrowse") + parser.add_argument("--sessName", help="Session name", default="AutoJBrowse") + parser.add_argument( + "--trackmeta", + help="Repeatable of 'filename, filext,filepath, ... ,' for JBrowse2 tracks", + default=[], + action="append", + ) parser.add_argument( - "--collection", - help="Collection of 'filepath, filename, filext' for JBrowse2", + "--referencemeta", + help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks", default=[], - action="extend", + action="append", + ) + parser.add_argument( + "--pafmeta", + help="Repeatable. Each is a 'filename, filext, filepath, ... ,' for a JBrowse2 paf track", + default=[], + action="append", ) - parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.0.1") + parser.add_argument( + "--pafreferencemeta", + help="Repeatable. Each is a 'pafname,filepath,refname,filepath,refname....'. Every paf must have a corresponding one ", + default=[], + action="append", + ) + + parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2") + parser.add_argument("--outdir", help="Output directory", required=True) args = parser.parse_args() - sessName = args.sessname - flistList = [x.split(",") for x in args.collection] - if flistList: - listgenomes = [f for f in flistList if f[0].startswith("REFERENCE_")] + sessName = args.sessName + # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key' + trackList = [x.strip().split(",") for x in args.trackmeta if x > ''] + refList = [x.strip().split(",") for x in args.referencemeta if x > ''] + print("tracklist = %s\nreflist = %s" % (trackList,refList)) + if len(refList) > 0: + listgenomes = [f for f in refList if f[1] in ['fasta', 'fasta.gz']] + # assume no pafs here + print('genomes=%s' % listgenomes) if len(listgenomes) > 0: genome_paths = [ - x[1] for x in listgenomes + x[0] for x in listgenomes ] # expect genome_1_genomename.fasta etc - genome_names = [x[0].split("REFERENCE_")[1] for x in listgenomes] + genome_names = [x[2] for x in listgenomes] jc = jbC( outdir=args.outdir, genomes=[ @@ -67,11 +68,13 @@ "path": x, "meta": { "name": genome_names[i], + "dataset_dname": genome_names[i], }, } for i, x in enumerate(genome_paths) ], ) + sys.stdout.write('$$$ genome_paths:%s genome_names: %s' % (genome_paths,genome_names)) jc.process_genomes() default_session_data = { "visibility": { @@ -81,42 +84,49 @@ "style": {}, "style_labels": {}, } - defLoc = makeDefaultLocation(jc) - listtracks = [f for f in flistList if not f[0].startswith("REFERENCE_")] + + listtracks = trackList # foo.paf must have a foo_paf.fasta or fasta.gz to match - tnames = [x[0] for x in listtracks] - texts = [x[2] for x in listtracks] + tnames = [x[2] for x in listtracks] + texts = [x[1] for x in listtracks] for i, track in enumerate(listtracks): - if track[2] == "paf": - refname = track[0] + "_paf.fasta" - refdat = [x[1] for x in listtracks if x[0] == refname] + tpath, trext, trackname = track[:3] + if trext == "paf": + refname = trackname + "_paf.fasta" + refdat = [x[2] for x in listtracks if x[2] == refname] if not refdat: jc.logging.warn( "!! No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" - % (refname, tnames[i]) + % (refname, trackname) ) sys.exit(3) else: track_conf = { "conf": { "options": { - "paf": {"genome": refdat, "genome_label": track[0]} + "paf": {"genome": refdat, "genome_label": trackname} } } } + elif trext == 'bam': + track_conf["conf"] = {"options": {"bam": {"bam_indices": {"bam_index": track[3]}}}} + elif trext == 'cram': + track_conf["conf"] = {"options": {"cram": {"cram_indices": {"cram_index": track[3]}}}} else: track_conf = {} - track_conf["format"] = track[2] - track_conf["name"] = track[0] - track_conf["label"] = track[0] - track_conf["trackfiles"] = [] + track_conf["format"] = trext + track_conf["name"] = trackname + track_conf["label"] = trackname + track_conf["trackfiles"] = [(tpath, trext, trackname,{}),] + track_conf["category"] = "Autogenerated" keys = jc.process_annotations(track_conf) if keys: for key in keys: - default_session_data["visibility"][ - track.attrib.get("visibility", "default_off") - ].append(key) + if trext in ["bigwig", "gff3", "gff", "vcf", "maf",]: + default_session_data["visibility"]["default_on"].append(key) + else: + default_session_data["visibility"]["default_off"].append(key) # if track_conf.get("style", None): # default_session_data["style"][key] = track_conf[ # "style" @@ -140,8 +150,10 @@ else: jc.config_json["tracks"] = jc.tracksToAdd jc.write_config() - defaultData = {"defaultLocation": defLoc, "session_name": sessName} - jc.add_default_session(defaultData) + defLoc = makeDefaultLocation() + default_session_data.update({"defaultLocation": defLoc, "session_name": sessName}) + track_conf.update(default_session_data) + jc.add_default_session(default_session_data) # jc.text_index() not sure what broke here. else: - sys.stderr.write("!! empty collection supplied - nothing to process") + print("!! empty collection supplied - nothing to process")