comparison env/lib/python3.7/site-packages/cwltool/pack.py @ 2:6af9afd405e9 draft

"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author shellac
date Thu, 14 May 2020 14:56:58 -0400 (2020-05-14)
parents 26e78fe6e8c4
children
comparison
equal deleted inserted replaced
1:75ca89e9b81c 2:6af9afd405e9
1 """Reformat a CWL document and all its references to be a single stream."""
2 from __future__ import absolute_import
3
4 import copy
5 from typing import (Any, Callable, Dict, List, MutableMapping, MutableSequence,
6 Optional, Set, Union, cast)
7
8 from ruamel.yaml.comments import CommentedMap, CommentedSeq
9 from schema_salad.ref_resolver import Loader # pylint: disable=unused-import
10 from schema_salad.ref_resolver import SubLoader
11 from schema_salad.sourceline import cmap
12 from six import iteritems, string_types
13 from six.moves import urllib
14 from typing_extensions import Text # pylint: disable=unused-import
15 # move to a regular typing import when Python 3.3-3.6 is no longer supported
16
17 from .process import shortname, uniquename
18
19
20 def flatten_deps(d, files): # type: (Any, Set[Text]) -> None
21 if isinstance(d, MutableSequence):
22 for s in d:
23 flatten_deps(s, files)
24 elif isinstance(d, MutableMapping):
25 if d["class"] == "File":
26 files.add(d["location"])
27 if "secondaryFiles" in d:
28 flatten_deps(d["secondaryFiles"], files)
29 if "listing" in d:
30 flatten_deps(d["listing"], files)
31
32 LoadRefType = Callable[[Optional[Text], Text], Union[Dict[Text, Any], List[Dict[Text, Any]], Text, None]]
33
34
35 def find_run(d, # type: Any
36 loadref, # type: LoadRefType
37 runs # type: Set[Text]
38 ): # type: (...) -> None
39 if isinstance(d, MutableSequence):
40 for s in d:
41 find_run(s, loadref, runs)
42 elif isinstance(d, MutableMapping):
43 if "run" in d and isinstance(d["run"], string_types):
44 if d["run"] not in runs:
45 runs.add(d["run"])
46 find_run(loadref(None, d["run"]), loadref, runs)
47 for s in d.values():
48 find_run(s, loadref, runs)
49
50
51 def find_ids(d, ids): # type: (Any, Set[Text]) -> None
52 if isinstance(d, MutableSequence):
53 for s in d:
54 find_ids(s, ids)
55 elif isinstance(d, MutableMapping):
56 for i in ("id", "name"):
57 if i in d and isinstance(d[i], string_types):
58 ids.add(d[i])
59 for s in d.values():
60 find_ids(s, ids)
61
62
63 def replace_refs(d, rewrite, stem, newstem):
64 # type: (Any, Dict[Text, Text], Text, Text) -> None
65 if isinstance(d, MutableSequence):
66 for s, v in enumerate(d):
67 if isinstance(v, string_types):
68 if v in rewrite:
69 d[s] = rewrite[v]
70 elif v.startswith(stem):
71 d[s] = newstem + v[len(stem):]
72 rewrite[v] = d[s]
73 else:
74 replace_refs(v, rewrite, stem, newstem)
75 elif isinstance(d, MutableMapping):
76 for s, v in d.items():
77 if isinstance(v, string_types):
78 if v in rewrite:
79 d[s] = rewrite[v]
80 elif v.startswith(stem):
81 id_ = v[len(stem):]
82 # prevent appending newstems if tool is already packed
83 if id_.startswith(newstem.strip("#")):
84 d[s] = "#" + id_
85 else:
86 d[s] = newstem + id_
87 rewrite[v] = d[s]
88 replace_refs(v, rewrite, stem, newstem)
89
90 def import_embed(d, seen):
91 # type: (Any, Set[Text]) -> None
92 if isinstance(d, MutableSequence):
93 for v in d:
94 import_embed(v, seen)
95 elif isinstance(d, MutableMapping):
96 for n in ("id", "name"):
97 if n in d:
98 if d[n] in seen:
99 this = d[n]
100 d.clear()
101 d["$import"] = this
102 else:
103 this = d[n]
104 seen.add(this)
105 break
106
107 for k in sorted(d.keys()):
108 import_embed(d[k], seen)
109
110
111 def pack(document_loader, # type: Loader
112 processobj, # type: Union[Dict[Text, Any], List[Dict[Text, Any]]]
113 uri, # type: Text
114 metadata, # type: Dict[Text, Text]
115 rewrite_out=None # type: Optional[Dict[Text, Text]]
116 ): # type: (...) -> Dict[Text, Any]
117
118 document_loader = SubLoader(document_loader)
119 document_loader.idx = {}
120 if isinstance(processobj, MutableMapping):
121 document_loader.idx[processobj["id"]] = CommentedMap(iteritems(processobj))
122 elif isinstance(processobj, MutableSequence):
123 _, frag = urllib.parse.urldefrag(uri)
124 for po in processobj:
125 if not frag:
126 if po["id"].endswith("#main"):
127 uri = po["id"]
128 document_loader.idx[po["id"]] = CommentedMap(iteritems(po))
129 document_loader.idx[metadata["id"]] = CommentedMap(iteritems(metadata))
130
131 def loadref(base, uri):
132 # type: (Optional[Text], Text) -> Union[Dict[Text, Any], List[Dict[Text, Any]], Text, None]
133 return document_loader.resolve_ref(uri, base_url=base)[0]
134
135 ids = set() # type: Set[Text]
136 find_ids(processobj, ids)
137
138 runs = {uri}
139 find_run(processobj, loadref, runs)
140
141 for f in runs:
142 find_ids(document_loader.resolve_ref(f)[0], ids)
143
144 names = set() # type: Set[Text]
145 if rewrite_out is None:
146 rewrite = {} # type: Dict[Text, Text]
147 else:
148 rewrite = rewrite_out
149
150 mainpath, _ = urllib.parse.urldefrag(uri)
151
152 def rewrite_id(r, mainuri):
153 # type: (Text, Text) -> None
154 if r == mainuri:
155 rewrite[r] = "#main"
156 elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"):
157 if r[len(mainuri):].startswith("#main/"):
158 rewrite[r] = "#" + uniquename(r[len(mainuri)+1:], names)
159 else:
160 rewrite[r] = "#" + uniquename("main/"+r[len(mainuri)+1:], names)
161 else:
162 path, frag = urllib.parse.urldefrag(r)
163 if path == mainpath:
164 rewrite[r] = "#" + uniquename(frag, names)
165 else:
166 if path not in rewrite:
167 rewrite[path] = "#" + uniquename(shortname(path), names)
168
169 sortedids = sorted(ids)
170
171 for r in sortedids:
172 rewrite_id(r, uri)
173
174 packed = CommentedMap((("$graph", CommentedSeq()),
175 ("cwlVersion", metadata["cwlVersion"])))
176 namespaces = metadata.get('$namespaces', None)
177
178 schemas = set() # type: Set[Text]
179 if '$schemas' in metadata:
180 for each_schema in metadata["$schemas"]:
181 schemas.add(each_schema)
182 for r in sorted(runs):
183 dcr, metadata = document_loader.resolve_ref(r)
184 if isinstance(dcr, CommentedSeq):
185 dcr = dcr[0]
186 dcr = cast(CommentedMap, dcr)
187 if not isinstance(dcr, MutableMapping):
188 continue
189 metadata = cast(Dict[Text, Any], metadata)
190 if "$schemas" in metadata:
191 for s in metadata["$schemas"]:
192 schemas.add(s)
193 if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"):
194 continue
195 dc = cast(Dict[Text, Any], copy.deepcopy(dcr))
196 v = rewrite[r]
197 dc["id"] = v
198 for n in ("name", "cwlVersion", "$namespaces", "$schemas"):
199 if n in dc:
200 del dc[n]
201 packed["$graph"].append(dc)
202
203 if schemas:
204 packed["$schemas"] = list(schemas)
205
206 for r in list(rewrite.keys()):
207 v = rewrite[r]
208 replace_refs(packed, rewrite, r + "/" if "#" in r else r + "#", v + "/")
209
210 import_embed(packed, set())
211
212 if len(packed["$graph"]) == 1:
213 # duplicate 'cwlVersion' and $schemas inside $graph when there is only
214 # a single item because we will print the contents inside '$graph'
215 # rather than whole dict
216 packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"]
217 if schemas:
218 packed["$graph"][0]["$schemas"] = list(schemas)
219 # always include $namespaces in the #main
220 if namespaces:
221 packed["$graph"][0]["$namespaces"] = namespaces
222
223 return packed