comparison autogenJB2.py @ 35:15da358c3108 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 80b849766a962bac4bd0bb8cb69c118cc42699cd-dirty
author fubar
date Wed, 28 Feb 2024 10:08:57 +0000 (11 months ago)
parents 8f02a84ee278
children 5f39f745682f
comparison
equal deleted inserted replaced
34:2893ef33fba9 35:15da358c3108
1 import argparse 1 import argparse
2 import re 2 import logging
3 import sys 3 import sys
4 4
5 from jbrowse2 import jbrowseConnector as jbC 5 from jbrowse2 import JbrowseConnector as jbC
6
7 logging.basicConfig(level=logging.debug)
8 log = logging.getLogger("jbrowse")
6 9
7 10
8 def makeDefaultLocation(jc, defLoc=None): 11 def makeDefaultLocation():
9 12
10 refName = None 13 refName = jc.genome_firstcontig
11 drdict = { 14 defloc = "%s:100..10000" % refName
12 "reversed": False, 15 print ('defloc',defloc)
13 "assemblyName": jc.genome_name, 16 return defloc
14 "start": 0,
15 "end": 100000,
16 }
17
18 if defLoc:
19 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", defLoc)
20 # allow commas like 100,000 but ignore as integer
21 if loc_match:
22 refName = loc_match.group(1)
23 drdict["refName"] = refName
24 if loc_match.group(2) > "":
25 drdict["start"] = int(loc_match.group(2).replace(",", ""))
26 if loc_match.group(3) > "":
27 drdict["end"] = int(loc_match.group(3).replace(",", ""))
28 else:
29 jc.logging.info(
30 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"
31 % defLoc
32 )
33 else:
34 drdict["refName"] = jc.genome_firstcontig
35 if drdict.get("refName", None):
36 jc.logging.info("@@@ defaultlocation %s for default session" % drdict)
37 return drdict
38 else:
39 jc.logging.info("@@@ no contig name found for default session - please add one!")
40 return None
41 17
42 18
43 if __name__ == "__main__": 19 if __name__ == "__main__":
44 parser = argparse.ArgumentParser(description="", epilog="") 20 parser = argparse.ArgumentParser(description="", epilog="")
45 parser.add_argument("--sessname", help="Session name", default="AutoJBrowse") 21 parser.add_argument("--sessName", help="Session name", default="AutoJBrowse")
46 parser.add_argument( 22 parser.add_argument(
47 "--collection", 23 "--trackmeta",
48 help="Collection of 'filepath, filename, filext' for JBrowse2", 24 help="Repeatable of 'filename, filext,filepath, ... ,' for JBrowse2 tracks",
49 default=[], 25 default=[],
50 action="extend", 26 action="append",
51 ) 27 )
52 parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.0.1") 28 parser.add_argument(
29 "--referencemeta",
30 help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks",
31 default=[],
32 action="append",
33 )
34 parser.add_argument(
35 "--pafmeta",
36 help="Repeatable. Each is a 'filename, filext, filepath, ... ,' for a JBrowse2 paf track",
37 default=[],
38 action="append",
39 )
40 parser.add_argument(
41 "--pafreferencemeta",
42 help="Repeatable. Each is a 'pafname,filepath,refname,filepath,refname....'. Every paf must have a corresponding one ",
43 default=[],
44 action="append",
45 )
46
47 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2")
48 parser.add_argument("--outdir", help="Output directory", required=True)
53 args = parser.parse_args() 49 args = parser.parse_args()
54 sessName = args.sessname 50 sessName = args.sessName
55 flistList = [x.split(",") for x in args.collection] 51 # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key'
56 if flistList: 52 trackList = [x.strip().split(",") for x in args.trackmeta if x > '']
57 listgenomes = [f for f in flistList if f[0].startswith("REFERENCE_")] 53 refList = [x.strip().split(",") for x in args.referencemeta if x > '']
54 print("tracklist = %s\nreflist = %s" % (trackList,refList))
55 if len(refList) > 0:
56 listgenomes = [f for f in refList if f[1] in ['fasta', 'fasta.gz']]
57 # assume no pafs here
58 print('genomes=%s' % listgenomes)
58 if len(listgenomes) > 0: 59 if len(listgenomes) > 0:
59 genome_paths = [ 60 genome_paths = [
60 x[1] for x in listgenomes 61 x[0] for x in listgenomes
61 ] # expect genome_1_genomename.fasta etc 62 ] # expect genome_1_genomename.fasta etc
62 genome_names = [x[0].split("REFERENCE_")[1] for x in listgenomes] 63 genome_names = [x[2] for x in listgenomes]
63 jc = jbC( 64 jc = jbC(
64 outdir=args.outdir, 65 outdir=args.outdir,
65 genomes=[ 66 genomes=[
66 { 67 {
67 "path": x, 68 "path": x,
68 "meta": { 69 "meta": {
69 "name": genome_names[i], 70 "name": genome_names[i],
71 "dataset_dname": genome_names[i],
70 }, 72 },
71 } 73 }
72 for i, x in enumerate(genome_paths) 74 for i, x in enumerate(genome_paths)
73 ], 75 ],
74 ) 76 )
77 sys.stdout.write('$$$ genome_paths:%s genome_names: %s' % (genome_paths,genome_names))
75 jc.process_genomes() 78 jc.process_genomes()
76 default_session_data = { 79 default_session_data = {
77 "visibility": { 80 "visibility": {
78 "default_on": [], 81 "default_on": [],
79 "default_off": [], 82 "default_off": [],
80 }, 83 },
81 "style": {}, 84 "style": {},
82 "style_labels": {}, 85 "style_labels": {},
83 } 86 }
84 defLoc = makeDefaultLocation(jc) 87
85 listtracks = [f for f in flistList if not f[0].startswith("REFERENCE_")] 88 listtracks = trackList
86 # foo.paf must have a foo_paf.fasta or fasta.gz to match 89 # foo.paf must have a foo_paf.fasta or fasta.gz to match
87 tnames = [x[0] for x in listtracks] 90 tnames = [x[2] for x in listtracks]
88 texts = [x[2] for x in listtracks] 91 texts = [x[1] for x in listtracks]
89 for i, track in enumerate(listtracks): 92 for i, track in enumerate(listtracks):
90 if track[2] == "paf": 93 tpath, trext, trackname = track[:3]
91 refname = track[0] + "_paf.fasta" 94 if trext == "paf":
92 refdat = [x[1] for x in listtracks if x[0] == refname] 95 refname = trackname + "_paf.fasta"
96 refdat = [x[2] for x in listtracks if x[2] == refname]
93 if not refdat: 97 if not refdat:
94 jc.logging.warn( 98 jc.logging.warn(
95 "!! No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" 99 "!! No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf"
96 % (refname, tnames[i]) 100 % (refname, trackname)
97 ) 101 )
98 sys.exit(3) 102 sys.exit(3)
99 else: 103 else:
100 track_conf = { 104 track_conf = {
101 "conf": { 105 "conf": {
102 "options": { 106 "options": {
103 "paf": {"genome": refdat, "genome_label": track[0]} 107 "paf": {"genome": refdat, "genome_label": trackname}
104 } 108 }
105 } 109 }
106 } 110 }
111 elif trext == 'bam':
112 track_conf["conf"] = {"options": {"bam": {"bam_indices": {"bam_index": track[3]}}}}
113 elif trext == 'cram':
114 track_conf["conf"] = {"options": {"cram": {"cram_indices": {"cram_index": track[3]}}}}
107 else: 115 else:
108 track_conf = {} 116 track_conf = {}
109 track_conf["format"] = track[2] 117 track_conf["format"] = trext
110 track_conf["name"] = track[0] 118 track_conf["name"] = trackname
111 track_conf["label"] = track[0] 119 track_conf["label"] = trackname
112 track_conf["trackfiles"] = [] 120 track_conf["trackfiles"] = [(tpath, trext, trackname,{}),]
121 track_conf["category"] = "Autogenerated"
113 keys = jc.process_annotations(track_conf) 122 keys = jc.process_annotations(track_conf)
114 123
115 if keys: 124 if keys:
116 for key in keys: 125 for key in keys:
117 default_session_data["visibility"][ 126 if trext in ["bigwig", "gff3", "gff", "vcf", "maf",]:
118 track.attrib.get("visibility", "default_off") 127 default_session_data["visibility"]["default_on"].append(key)
119 ].append(key) 128 else:
129 default_session_data["visibility"]["default_off"].append(key)
120 # if track_conf.get("style", None): 130 # if track_conf.get("style", None):
121 # default_session_data["style"][key] = track_conf[ 131 # default_session_data["style"][key] = track_conf[
122 # "style" 132 # "style"
123 # ] # TODO do we need this anymore? 133 # ] # TODO do we need this anymore?
124 # if track_conf.get("style_lables", None): 134 # if track_conf.get("style_lables", None):
138 if trackconf: 148 if trackconf:
139 jc.config_json["tracks"].update(jc.tracksToAdd) 149 jc.config_json["tracks"].update(jc.tracksToAdd)
140 else: 150 else:
141 jc.config_json["tracks"] = jc.tracksToAdd 151 jc.config_json["tracks"] = jc.tracksToAdd
142 jc.write_config() 152 jc.write_config()
143 defaultData = {"defaultLocation": defLoc, "session_name": sessName} 153 defLoc = makeDefaultLocation()
144 jc.add_default_session(defaultData) 154 default_session_data.update({"defaultLocation": defLoc, "session_name": sessName})
155 track_conf.update(default_session_data)
156 jc.add_default_session(default_session_data)
145 # jc.text_index() not sure what broke here. 157 # jc.text_index() not sure what broke here.
146 else: 158 else:
147 sys.stderr.write("!! empty collection supplied - nothing to process") 159 print("!! empty collection supplied - nothing to process")