Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/cwltool/pack.py @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author | shellac |
---|---|
date | Thu, 14 May 2020 14:56:58 -0400 (2020-05-14) |
parents | 26e78fe6e8c4 |
children |
comparison
equal
deleted
inserted
replaced
1:75ca89e9b81c | 2:6af9afd405e9 |
---|---|
1 """Reformat a CWL document and all its references to be a single stream.""" | |
2 from __future__ import absolute_import | |
3 | |
4 import copy | |
5 from typing import (Any, Callable, Dict, List, MutableMapping, MutableSequence, | |
6 Optional, Set, Union, cast) | |
7 | |
8 from ruamel.yaml.comments import CommentedMap, CommentedSeq | |
9 from schema_salad.ref_resolver import Loader # pylint: disable=unused-import | |
10 from schema_salad.ref_resolver import SubLoader | |
11 from schema_salad.sourceline import cmap | |
12 from six import iteritems, string_types | |
13 from six.moves import urllib | |
14 from typing_extensions import Text # pylint: disable=unused-import | |
15 # move to a regular typing import when Python 3.3-3.6 is no longer supported | |
16 | |
17 from .process import shortname, uniquename | |
18 | |
19 | |
20 def flatten_deps(d, files): # type: (Any, Set[Text]) -> None | |
21 if isinstance(d, MutableSequence): | |
22 for s in d: | |
23 flatten_deps(s, files) | |
24 elif isinstance(d, MutableMapping): | |
25 if d["class"] == "File": | |
26 files.add(d["location"]) | |
27 if "secondaryFiles" in d: | |
28 flatten_deps(d["secondaryFiles"], files) | |
29 if "listing" in d: | |
30 flatten_deps(d["listing"], files) | |
31 | |
32 LoadRefType = Callable[[Optional[Text], Text], Union[Dict[Text, Any], List[Dict[Text, Any]], Text, None]] | |
33 | |
34 | |
35 def find_run(d, # type: Any | |
36 loadref, # type: LoadRefType | |
37 runs # type: Set[Text] | |
38 ): # type: (...) -> None | |
39 if isinstance(d, MutableSequence): | |
40 for s in d: | |
41 find_run(s, loadref, runs) | |
42 elif isinstance(d, MutableMapping): | |
43 if "run" in d and isinstance(d["run"], string_types): | |
44 if d["run"] not in runs: | |
45 runs.add(d["run"]) | |
46 find_run(loadref(None, d["run"]), loadref, runs) | |
47 for s in d.values(): | |
48 find_run(s, loadref, runs) | |
49 | |
50 | |
51 def find_ids(d, ids): # type: (Any, Set[Text]) -> None | |
52 if isinstance(d, MutableSequence): | |
53 for s in d: | |
54 find_ids(s, ids) | |
55 elif isinstance(d, MutableMapping): | |
56 for i in ("id", "name"): | |
57 if i in d and isinstance(d[i], string_types): | |
58 ids.add(d[i]) | |
59 for s in d.values(): | |
60 find_ids(s, ids) | |
61 | |
62 | |
63 def replace_refs(d, rewrite, stem, newstem): | |
64 # type: (Any, Dict[Text, Text], Text, Text) -> None | |
65 if isinstance(d, MutableSequence): | |
66 for s, v in enumerate(d): | |
67 if isinstance(v, string_types): | |
68 if v in rewrite: | |
69 d[s] = rewrite[v] | |
70 elif v.startswith(stem): | |
71 d[s] = newstem + v[len(stem):] | |
72 rewrite[v] = d[s] | |
73 else: | |
74 replace_refs(v, rewrite, stem, newstem) | |
75 elif isinstance(d, MutableMapping): | |
76 for s, v in d.items(): | |
77 if isinstance(v, string_types): | |
78 if v in rewrite: | |
79 d[s] = rewrite[v] | |
80 elif v.startswith(stem): | |
81 id_ = v[len(stem):] | |
82 # prevent appending newstems if tool is already packed | |
83 if id_.startswith(newstem.strip("#")): | |
84 d[s] = "#" + id_ | |
85 else: | |
86 d[s] = newstem + id_ | |
87 rewrite[v] = d[s] | |
88 replace_refs(v, rewrite, stem, newstem) | |
89 | |
90 def import_embed(d, seen): | |
91 # type: (Any, Set[Text]) -> None | |
92 if isinstance(d, MutableSequence): | |
93 for v in d: | |
94 import_embed(v, seen) | |
95 elif isinstance(d, MutableMapping): | |
96 for n in ("id", "name"): | |
97 if n in d: | |
98 if d[n] in seen: | |
99 this = d[n] | |
100 d.clear() | |
101 d["$import"] = this | |
102 else: | |
103 this = d[n] | |
104 seen.add(this) | |
105 break | |
106 | |
107 for k in sorted(d.keys()): | |
108 import_embed(d[k], seen) | |
109 | |
110 | |
111 def pack(document_loader, # type: Loader | |
112 processobj, # type: Union[Dict[Text, Any], List[Dict[Text, Any]]] | |
113 uri, # type: Text | |
114 metadata, # type: Dict[Text, Text] | |
115 rewrite_out=None # type: Optional[Dict[Text, Text]] | |
116 ): # type: (...) -> Dict[Text, Any] | |
117 | |
118 document_loader = SubLoader(document_loader) | |
119 document_loader.idx = {} | |
120 if isinstance(processobj, MutableMapping): | |
121 document_loader.idx[processobj["id"]] = CommentedMap(iteritems(processobj)) | |
122 elif isinstance(processobj, MutableSequence): | |
123 _, frag = urllib.parse.urldefrag(uri) | |
124 for po in processobj: | |
125 if not frag: | |
126 if po["id"].endswith("#main"): | |
127 uri = po["id"] | |
128 document_loader.idx[po["id"]] = CommentedMap(iteritems(po)) | |
129 document_loader.idx[metadata["id"]] = CommentedMap(iteritems(metadata)) | |
130 | |
131 def loadref(base, uri): | |
132 # type: (Optional[Text], Text) -> Union[Dict[Text, Any], List[Dict[Text, Any]], Text, None] | |
133 return document_loader.resolve_ref(uri, base_url=base)[0] | |
134 | |
135 ids = set() # type: Set[Text] | |
136 find_ids(processobj, ids) | |
137 | |
138 runs = {uri} | |
139 find_run(processobj, loadref, runs) | |
140 | |
141 for f in runs: | |
142 find_ids(document_loader.resolve_ref(f)[0], ids) | |
143 | |
144 names = set() # type: Set[Text] | |
145 if rewrite_out is None: | |
146 rewrite = {} # type: Dict[Text, Text] | |
147 else: | |
148 rewrite = rewrite_out | |
149 | |
150 mainpath, _ = urllib.parse.urldefrag(uri) | |
151 | |
152 def rewrite_id(r, mainuri): | |
153 # type: (Text, Text) -> None | |
154 if r == mainuri: | |
155 rewrite[r] = "#main" | |
156 elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"): | |
157 if r[len(mainuri):].startswith("#main/"): | |
158 rewrite[r] = "#" + uniquename(r[len(mainuri)+1:], names) | |
159 else: | |
160 rewrite[r] = "#" + uniquename("main/"+r[len(mainuri)+1:], names) | |
161 else: | |
162 path, frag = urllib.parse.urldefrag(r) | |
163 if path == mainpath: | |
164 rewrite[r] = "#" + uniquename(frag, names) | |
165 else: | |
166 if path not in rewrite: | |
167 rewrite[path] = "#" + uniquename(shortname(path), names) | |
168 | |
169 sortedids = sorted(ids) | |
170 | |
171 for r in sortedids: | |
172 rewrite_id(r, uri) | |
173 | |
174 packed = CommentedMap((("$graph", CommentedSeq()), | |
175 ("cwlVersion", metadata["cwlVersion"]))) | |
176 namespaces = metadata.get('$namespaces', None) | |
177 | |
178 schemas = set() # type: Set[Text] | |
179 if '$schemas' in metadata: | |
180 for each_schema in metadata["$schemas"]: | |
181 schemas.add(each_schema) | |
182 for r in sorted(runs): | |
183 dcr, metadata = document_loader.resolve_ref(r) | |
184 if isinstance(dcr, CommentedSeq): | |
185 dcr = dcr[0] | |
186 dcr = cast(CommentedMap, dcr) | |
187 if not isinstance(dcr, MutableMapping): | |
188 continue | |
189 metadata = cast(Dict[Text, Any], metadata) | |
190 if "$schemas" in metadata: | |
191 for s in metadata["$schemas"]: | |
192 schemas.add(s) | |
193 if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"): | |
194 continue | |
195 dc = cast(Dict[Text, Any], copy.deepcopy(dcr)) | |
196 v = rewrite[r] | |
197 dc["id"] = v | |
198 for n in ("name", "cwlVersion", "$namespaces", "$schemas"): | |
199 if n in dc: | |
200 del dc[n] | |
201 packed["$graph"].append(dc) | |
202 | |
203 if schemas: | |
204 packed["$schemas"] = list(schemas) | |
205 | |
206 for r in list(rewrite.keys()): | |
207 v = rewrite[r] | |
208 replace_refs(packed, rewrite, r + "/" if "#" in r else r + "#", v + "/") | |
209 | |
210 import_embed(packed, set()) | |
211 | |
212 if len(packed["$graph"]) == 1: | |
213 # duplicate 'cwlVersion' and $schemas inside $graph when there is only | |
214 # a single item because we will print the contents inside '$graph' | |
215 # rather than whole dict | |
216 packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"] | |
217 if schemas: | |
218 packed["$graph"][0]["$schemas"] = list(schemas) | |
219 # always include $namespaces in the #main | |
220 if namespaces: | |
221 packed["$graph"][0]["$namespaces"] = namespaces | |
222 | |
223 return packed |