Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/cwltool/pack.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac | 
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 | 
| parents | 79f47841a781 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 4:79f47841a781 | 5:9b1c78e6ba9c | 
|---|---|
| 1 """Reformat a CWL document and all its references to be a single stream.""" | |
| 2 from __future__ import absolute_import | |
| 3 | |
| 4 import copy | |
| 5 from typing import (Any, Callable, Dict, List, MutableMapping, MutableSequence, | |
| 6 Optional, Set, Union, cast) | |
| 7 | |
| 8 from ruamel.yaml.comments import CommentedMap, CommentedSeq | |
| 9 from schema_salad.ref_resolver import Loader # pylint: disable=unused-import | |
| 10 from schema_salad.ref_resolver import SubLoader | |
| 11 from schema_salad.sourceline import cmap | |
| 12 from six import iteritems, string_types | |
| 13 from six.moves import urllib | |
| 14 from typing_extensions import Text # pylint: disable=unused-import | |
| 15 # move to a regular typing import when Python 3.3-3.6 is no longer supported | |
| 16 | |
| 17 from .process import shortname, uniquename | |
| 18 | |
| 19 | |
| 20 def flatten_deps(d, files): # type: (Any, Set[Text]) -> None | |
| 21 if isinstance(d, MutableSequence): | |
| 22 for s in d: | |
| 23 flatten_deps(s, files) | |
| 24 elif isinstance(d, MutableMapping): | |
| 25 if d["class"] == "File": | |
| 26 files.add(d["location"]) | |
| 27 if "secondaryFiles" in d: | |
| 28 flatten_deps(d["secondaryFiles"], files) | |
| 29 if "listing" in d: | |
| 30 flatten_deps(d["listing"], files) | |
| 31 | |
| 32 LoadRefType = Callable[[Optional[Text], Text], Union[Dict[Text, Any], List[Dict[Text, Any]], Text, None]] | |
| 33 | |
| 34 | |
| 35 def find_run(d, # type: Any | |
| 36 loadref, # type: LoadRefType | |
| 37 runs # type: Set[Text] | |
| 38 ): # type: (...) -> None | |
| 39 if isinstance(d, MutableSequence): | |
| 40 for s in d: | |
| 41 find_run(s, loadref, runs) | |
| 42 elif isinstance(d, MutableMapping): | |
| 43 if "run" in d and isinstance(d["run"], string_types): | |
| 44 if d["run"] not in runs: | |
| 45 runs.add(d["run"]) | |
| 46 find_run(loadref(None, d["run"]), loadref, runs) | |
| 47 for s in d.values(): | |
| 48 find_run(s, loadref, runs) | |
| 49 | |
| 50 | |
| 51 def find_ids(d, ids): # type: (Any, Set[Text]) -> None | |
| 52 if isinstance(d, MutableSequence): | |
| 53 for s in d: | |
| 54 find_ids(s, ids) | |
| 55 elif isinstance(d, MutableMapping): | |
| 56 for i in ("id", "name"): | |
| 57 if i in d and isinstance(d[i], string_types): | |
| 58 ids.add(d[i]) | |
| 59 for s in d.values(): | |
| 60 find_ids(s, ids) | |
| 61 | |
| 62 | |
| 63 def replace_refs(d, rewrite, stem, newstem): | |
| 64 # type: (Any, Dict[Text, Text], Text, Text) -> None | |
| 65 if isinstance(d, MutableSequence): | |
| 66 for s, v in enumerate(d): | |
| 67 if isinstance(v, string_types): | |
| 68 if v in rewrite: | |
| 69 d[s] = rewrite[v] | |
| 70 elif v.startswith(stem): | |
| 71 d[s] = newstem + v[len(stem):] | |
| 72 rewrite[v] = d[s] | |
| 73 else: | |
| 74 replace_refs(v, rewrite, stem, newstem) | |
| 75 elif isinstance(d, MutableMapping): | |
| 76 for s, v in d.items(): | |
| 77 if isinstance(v, string_types): | |
| 78 if v in rewrite: | |
| 79 d[s] = rewrite[v] | |
| 80 elif v.startswith(stem): | |
| 81 id_ = v[len(stem):] | |
| 82 # prevent appending newstems if tool is already packed | |
| 83 if id_.startswith(newstem.strip("#")): | |
| 84 d[s] = "#" + id_ | |
| 85 else: | |
| 86 d[s] = newstem + id_ | |
| 87 rewrite[v] = d[s] | |
| 88 replace_refs(v, rewrite, stem, newstem) | |
| 89 | |
| 90 def import_embed(d, seen): | |
| 91 # type: (Any, Set[Text]) -> None | |
| 92 if isinstance(d, MutableSequence): | |
| 93 for v in d: | |
| 94 import_embed(v, seen) | |
| 95 elif isinstance(d, MutableMapping): | |
| 96 for n in ("id", "name"): | |
| 97 if n in d: | |
| 98 if d[n] in seen: | |
| 99 this = d[n] | |
| 100 d.clear() | |
| 101 d["$import"] = this | |
| 102 else: | |
| 103 this = d[n] | |
| 104 seen.add(this) | |
| 105 break | |
| 106 | |
| 107 for k in sorted(d.keys()): | |
| 108 import_embed(d[k], seen) | |
| 109 | |
| 110 | |
| 111 def pack(document_loader, # type: Loader | |
| 112 processobj, # type: Union[Dict[Text, Any], List[Dict[Text, Any]]] | |
| 113 uri, # type: Text | |
| 114 metadata, # type: Dict[Text, Text] | |
| 115 rewrite_out=None # type: Optional[Dict[Text, Text]] | |
| 116 ): # type: (...) -> Dict[Text, Any] | |
| 117 | |
| 118 document_loader = SubLoader(document_loader) | |
| 119 document_loader.idx = {} | |
| 120 if isinstance(processobj, MutableMapping): | |
| 121 document_loader.idx[processobj["id"]] = CommentedMap(iteritems(processobj)) | |
| 122 elif isinstance(processobj, MutableSequence): | |
| 123 _, frag = urllib.parse.urldefrag(uri) | |
| 124 for po in processobj: | |
| 125 if not frag: | |
| 126 if po["id"].endswith("#main"): | |
| 127 uri = po["id"] | |
| 128 document_loader.idx[po["id"]] = CommentedMap(iteritems(po)) | |
| 129 document_loader.idx[metadata["id"]] = CommentedMap(iteritems(metadata)) | |
| 130 | |
| 131 def loadref(base, uri): | |
| 132 # type: (Optional[Text], Text) -> Union[Dict[Text, Any], List[Dict[Text, Any]], Text, None] | |
| 133 return document_loader.resolve_ref(uri, base_url=base)[0] | |
| 134 | |
| 135 ids = set() # type: Set[Text] | |
| 136 find_ids(processobj, ids) | |
| 137 | |
| 138 runs = {uri} | |
| 139 find_run(processobj, loadref, runs) | |
| 140 | |
| 141 for f in runs: | |
| 142 find_ids(document_loader.resolve_ref(f)[0], ids) | |
| 143 | |
| 144 names = set() # type: Set[Text] | |
| 145 if rewrite_out is None: | |
| 146 rewrite = {} # type: Dict[Text, Text] | |
| 147 else: | |
| 148 rewrite = rewrite_out | |
| 149 | |
| 150 mainpath, _ = urllib.parse.urldefrag(uri) | |
| 151 | |
| 152 def rewrite_id(r, mainuri): | |
| 153 # type: (Text, Text) -> None | |
| 154 if r == mainuri: | |
| 155 rewrite[r] = "#main" | |
| 156 elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"): | |
| 157 if r[len(mainuri):].startswith("#main/"): | |
| 158 rewrite[r] = "#" + uniquename(r[len(mainuri)+1:], names) | |
| 159 else: | |
| 160 rewrite[r] = "#" + uniquename("main/"+r[len(mainuri)+1:], names) | |
| 161 else: | |
| 162 path, frag = urllib.parse.urldefrag(r) | |
| 163 if path == mainpath: | |
| 164 rewrite[r] = "#" + uniquename(frag, names) | |
| 165 else: | |
| 166 if path not in rewrite: | |
| 167 rewrite[path] = "#" + uniquename(shortname(path), names) | |
| 168 | |
| 169 sortedids = sorted(ids) | |
| 170 | |
| 171 for r in sortedids: | |
| 172 rewrite_id(r, uri) | |
| 173 | |
| 174 packed = CommentedMap((("$graph", CommentedSeq()), | |
| 175 ("cwlVersion", metadata["cwlVersion"]))) | |
| 176 namespaces = metadata.get('$namespaces', None) | |
| 177 | |
| 178 schemas = set() # type: Set[Text] | |
| 179 if '$schemas' in metadata: | |
| 180 for each_schema in metadata["$schemas"]: | |
| 181 schemas.add(each_schema) | |
| 182 for r in sorted(runs): | |
| 183 dcr, metadata = document_loader.resolve_ref(r) | |
| 184 if isinstance(dcr, CommentedSeq): | |
| 185 dcr = dcr[0] | |
| 186 dcr = cast(CommentedMap, dcr) | |
| 187 if not isinstance(dcr, MutableMapping): | |
| 188 continue | |
| 189 metadata = cast(Dict[Text, Any], metadata) | |
| 190 if "$schemas" in metadata: | |
| 191 for s in metadata["$schemas"]: | |
| 192 schemas.add(s) | |
| 193 if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"): | |
| 194 continue | |
| 195 dc = cast(Dict[Text, Any], copy.deepcopy(dcr)) | |
| 196 v = rewrite[r] | |
| 197 dc["id"] = v | |
| 198 for n in ("name", "cwlVersion", "$namespaces", "$schemas"): | |
| 199 if n in dc: | |
| 200 del dc[n] | |
| 201 packed["$graph"].append(dc) | |
| 202 | |
| 203 if schemas: | |
| 204 packed["$schemas"] = list(schemas) | |
| 205 | |
| 206 for r in list(rewrite.keys()): | |
| 207 v = rewrite[r] | |
| 208 replace_refs(packed, rewrite, r + "/" if "#" in r else r + "#", v + "/") | |
| 209 | |
| 210 import_embed(packed, set()) | |
| 211 | |
| 212 if len(packed["$graph"]) == 1: | |
| 213 # duplicate 'cwlVersion' and $schemas inside $graph when there is only | |
| 214 # a single item because we will print the contents inside '$graph' | |
| 215 # rather than whole dict | |
| 216 packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"] | |
| 217 if schemas: | |
| 218 packed["$graph"][0]["$schemas"] = list(schemas) | |
| 219 # always include $namespaces in the #main | |
| 220 if namespaces: | |
| 221 packed["$graph"][0]["$namespaces"] = namespaces | |
| 222 | |
| 223 return packed | 
