Mercurial > repos > fubar > jbrowse2
comparison autogenJB2.py @ 35:15da358c3108 draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 80b849766a962bac4bd0bb8cb69c118cc42699cd-dirty
author | fubar |
---|---|
date | Wed, 28 Feb 2024 10:08:57 +0000 (11 months ago) |
parents | 8f02a84ee278 |
children | 5f39f745682f |
comparison
equal
deleted
inserted
replaced
34:2893ef33fba9 | 35:15da358c3108 |
---|---|
1 import argparse | 1 import argparse |
2 import re | 2 import logging |
3 import sys | 3 import sys |
4 | 4 |
5 from jbrowse2 import jbrowseConnector as jbC | 5 from jbrowse2 import JbrowseConnector as jbC |
6 | |
7 logging.basicConfig(level=logging.debug) | |
8 log = logging.getLogger("jbrowse") | |
6 | 9 |
7 | 10 |
8 def makeDefaultLocation(jc, defLoc=None): | 11 def makeDefaultLocation(): |
9 | 12 |
10 refName = None | 13 refName = jc.genome_firstcontig |
11 drdict = { | 14 defloc = "%s:100..10000" % refName |
12 "reversed": False, | 15 print ('defloc',defloc) |
13 "assemblyName": jc.genome_name, | 16 return defloc |
14 "start": 0, | |
15 "end": 100000, | |
16 } | |
17 | |
18 if defLoc: | |
19 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", defLoc) | |
20 # allow commas like 100,000 but ignore as integer | |
21 if loc_match: | |
22 refName = loc_match.group(1) | |
23 drdict["refName"] = refName | |
24 if loc_match.group(2) > "": | |
25 drdict["start"] = int(loc_match.group(2).replace(",", "")) | |
26 if loc_match.group(3) > "": | |
27 drdict["end"] = int(loc_match.group(3).replace(",", "")) | |
28 else: | |
29 jc.logging.info( | |
30 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" | |
31 % defLoc | |
32 ) | |
33 else: | |
34 drdict["refName"] = jc.genome_firstcontig | |
35 if drdict.get("refName", None): | |
36 jc.logging.info("@@@ defaultlocation %s for default session" % drdict) | |
37 return drdict | |
38 else: | |
39 jc.logging.info("@@@ no contig name found for default session - please add one!") | |
40 return None | |
41 | 17 |
42 | 18 |
43 if __name__ == "__main__": | 19 if __name__ == "__main__": |
44 parser = argparse.ArgumentParser(description="", epilog="") | 20 parser = argparse.ArgumentParser(description="", epilog="") |
45 parser.add_argument("--sessname", help="Session name", default="AutoJBrowse") | 21 parser.add_argument("--sessName", help="Session name", default="AutoJBrowse") |
46 parser.add_argument( | 22 parser.add_argument( |
47 "--collection", | 23 "--trackmeta", |
48 help="Collection of 'filepath, filename, filext' for JBrowse2", | 24 help="Repeatable of 'filename, filext,filepath, ... ,' for JBrowse2 tracks", |
49 default=[], | 25 default=[], |
50 action="extend", | 26 action="append", |
51 ) | 27 ) |
52 parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.0.1") | 28 parser.add_argument( |
29 "--referencemeta", | |
30 help="Repeatable 'filename, filext, filepath, ... ,' for JBrowse2 reference tracks", | |
31 default=[], | |
32 action="append", | |
33 ) | |
34 parser.add_argument( | |
35 "--pafmeta", | |
36 help="Repeatable. Each is a 'filename, filext, filepath, ... ,' for a JBrowse2 paf track", | |
37 default=[], | |
38 action="append", | |
39 ) | |
40 parser.add_argument( | |
41 "--pafreferencemeta", | |
42 help="Repeatable. Each is a 'pafname,filepath,refname,filepath,refname....'. Every paf must have a corresponding one ", | |
43 default=[], | |
44 action="append", | |
45 ) | |
46 | |
47 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.10.2") | |
48 parser.add_argument("--outdir", help="Output directory", required=True) | |
53 args = parser.parse_args() | 49 args = parser.parse_args() |
54 sessName = args.sessname | 50 sessName = args.sessName |
55 flistList = [x.split(",") for x in args.collection] | 51 # --trackmeta $jbrowseme[$key],$jbrowseme[$key].ext,'$key' |
56 if flistList: | 52 trackList = [x.strip().split(",") for x in args.trackmeta if x > ''] |
57 listgenomes = [f for f in flistList if f[0].startswith("REFERENCE_")] | 53 refList = [x.strip().split(",") for x in args.referencemeta if x > ''] |
54 print("tracklist = %s\nreflist = %s" % (trackList,refList)) | |
55 if len(refList) > 0: | |
56 listgenomes = [f for f in refList if f[1] in ['fasta', 'fasta.gz']] | |
57 # assume no pafs here | |
58 print('genomes=%s' % listgenomes) | |
58 if len(listgenomes) > 0: | 59 if len(listgenomes) > 0: |
59 genome_paths = [ | 60 genome_paths = [ |
60 x[1] for x in listgenomes | 61 x[0] for x in listgenomes |
61 ] # expect genome_1_genomename.fasta etc | 62 ] # expect genome_1_genomename.fasta etc |
62 genome_names = [x[0].split("REFERENCE_")[1] for x in listgenomes] | 63 genome_names = [x[2] for x in listgenomes] |
63 jc = jbC( | 64 jc = jbC( |
64 outdir=args.outdir, | 65 outdir=args.outdir, |
65 genomes=[ | 66 genomes=[ |
66 { | 67 { |
67 "path": x, | 68 "path": x, |
68 "meta": { | 69 "meta": { |
69 "name": genome_names[i], | 70 "name": genome_names[i], |
71 "dataset_dname": genome_names[i], | |
70 }, | 72 }, |
71 } | 73 } |
72 for i, x in enumerate(genome_paths) | 74 for i, x in enumerate(genome_paths) |
73 ], | 75 ], |
74 ) | 76 ) |
77 sys.stdout.write('$$$ genome_paths:%s genome_names: %s' % (genome_paths,genome_names)) | |
75 jc.process_genomes() | 78 jc.process_genomes() |
76 default_session_data = { | 79 default_session_data = { |
77 "visibility": { | 80 "visibility": { |
78 "default_on": [], | 81 "default_on": [], |
79 "default_off": [], | 82 "default_off": [], |
80 }, | 83 }, |
81 "style": {}, | 84 "style": {}, |
82 "style_labels": {}, | 85 "style_labels": {}, |
83 } | 86 } |
84 defLoc = makeDefaultLocation(jc) | 87 |
85 listtracks = [f for f in flistList if not f[0].startswith("REFERENCE_")] | 88 listtracks = trackList |
86 # foo.paf must have a foo_paf.fasta or fasta.gz to match | 89 # foo.paf must have a foo_paf.fasta or fasta.gz to match |
87 tnames = [x[0] for x in listtracks] | 90 tnames = [x[2] for x in listtracks] |
88 texts = [x[2] for x in listtracks] | 91 texts = [x[1] for x in listtracks] |
89 for i, track in enumerate(listtracks): | 92 for i, track in enumerate(listtracks): |
90 if track[2] == "paf": | 93 tpath, trext, trackname = track[:3] |
91 refname = track[0] + "_paf.fasta" | 94 if trext == "paf": |
92 refdat = [x[1] for x in listtracks if x[0] == refname] | 95 refname = trackname + "_paf.fasta" |
96 refdat = [x[2] for x in listtracks if x[2] == refname] | |
93 if not refdat: | 97 if not refdat: |
94 jc.logging.warn( | 98 jc.logging.warn( |
95 "!! No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" | 99 "!! No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" |
96 % (refname, tnames[i]) | 100 % (refname, trackname) |
97 ) | 101 ) |
98 sys.exit(3) | 102 sys.exit(3) |
99 else: | 103 else: |
100 track_conf = { | 104 track_conf = { |
101 "conf": { | 105 "conf": { |
102 "options": { | 106 "options": { |
103 "paf": {"genome": refdat, "genome_label": track[0]} | 107 "paf": {"genome": refdat, "genome_label": trackname} |
104 } | 108 } |
105 } | 109 } |
106 } | 110 } |
111 elif trext == 'bam': | |
112 track_conf["conf"] = {"options": {"bam": {"bam_indices": {"bam_index": track[3]}}}} | |
113 elif trext == 'cram': | |
114 track_conf["conf"] = {"options": {"cram": {"cram_indices": {"cram_index": track[3]}}}} | |
107 else: | 115 else: |
108 track_conf = {} | 116 track_conf = {} |
109 track_conf["format"] = track[2] | 117 track_conf["format"] = trext |
110 track_conf["name"] = track[0] | 118 track_conf["name"] = trackname |
111 track_conf["label"] = track[0] | 119 track_conf["label"] = trackname |
112 track_conf["trackfiles"] = [] | 120 track_conf["trackfiles"] = [(tpath, trext, trackname,{}),] |
121 track_conf["category"] = "Autogenerated" | |
113 keys = jc.process_annotations(track_conf) | 122 keys = jc.process_annotations(track_conf) |
114 | 123 |
115 if keys: | 124 if keys: |
116 for key in keys: | 125 for key in keys: |
117 default_session_data["visibility"][ | 126 if trext in ["bigwig", "gff3", "gff", "vcf", "maf",]: |
118 track.attrib.get("visibility", "default_off") | 127 default_session_data["visibility"]["default_on"].append(key) |
119 ].append(key) | 128 else: |
129 default_session_data["visibility"]["default_off"].append(key) | |
120 # if track_conf.get("style", None): | 130 # if track_conf.get("style", None): |
121 # default_session_data["style"][key] = track_conf[ | 131 # default_session_data["style"][key] = track_conf[ |
122 # "style" | 132 # "style" |
123 # ] # TODO do we need this anymore? | 133 # ] # TODO do we need this anymore? |
124 # if track_conf.get("style_lables", None): | 134 # if track_conf.get("style_lables", None): |
138 if trackconf: | 148 if trackconf: |
139 jc.config_json["tracks"].update(jc.tracksToAdd) | 149 jc.config_json["tracks"].update(jc.tracksToAdd) |
140 else: | 150 else: |
141 jc.config_json["tracks"] = jc.tracksToAdd | 151 jc.config_json["tracks"] = jc.tracksToAdd |
142 jc.write_config() | 152 jc.write_config() |
143 defaultData = {"defaultLocation": defLoc, "session_name": sessName} | 153 defLoc = makeDefaultLocation() |
144 jc.add_default_session(defaultData) | 154 default_session_data.update({"defaultLocation": defLoc, "session_name": sessName}) |
155 track_conf.update(default_session_data) | |
156 jc.add_default_session(default_session_data) | |
145 # jc.text_index() not sure what broke here. | 157 # jc.text_index() not sure what broke here. |
146 else: | 158 else: |
147 sys.stderr.write("!! empty collection supplied - nothing to process") | 159 print("!! empty collection supplied - nothing to process") |