Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/schema_salad/ref_resolver.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 from __future__ import absolute_import | |
2 | |
3 import copy | |
4 import logging | |
5 import os | |
6 import re | |
7 import sys | |
8 import xml.sax | |
9 from io import open | |
10 from typing import Callable # pylint: disable=unused-import | |
11 from typing import ( | |
12 Any, | |
13 Dict, | |
14 Iterable, | |
15 List, | |
16 MutableMapping, | |
17 MutableSequence, | |
18 Optional, | |
19 Set, | |
20 Tuple, | |
21 TypeVar, | |
22 Union, | |
23 cast, | |
24 ) | |
25 | |
26 import requests | |
27 from cachecontrol.caches import FileCache | |
28 from cachecontrol.wrapper import CacheControl | |
29 from future.utils import raise_from | |
30 from rdflib.graph import Graph | |
31 from rdflib.namespace import OWL, RDF, RDFS | |
32 from rdflib.plugins.parsers.notation3 import BadSyntax | |
33 from six import StringIO, iteritems, string_types | |
34 from six.moves import range, urllib | |
35 from typing_extensions import Text # pylint: disable=unused-import | |
36 | |
37 from ruamel import yaml | |
38 from ruamel.yaml.comments import CommentedMap, CommentedSeq, LineCol | |
39 | |
40 from .exceptions import ValidationException, SchemaSaladException | |
41 from .sourceline import SourceLine, add_lc_filename, relname | |
42 from .utils import aslist, onWindows | |
43 | |
44 # move to a regular typing import when Python 3.3-3.6 is no longer supported | |
45 | |
46 | |
47 _logger = logging.getLogger("salad") | |
48 ContextType = Dict[Text, Union[Dict[Text, Any], Text, Iterable[Text]]] | |
49 DocumentType = TypeVar("DocumentType", CommentedSeq, CommentedMap) | |
50 DocumentOrStrType = TypeVar("DocumentOrStrType", CommentedSeq, CommentedMap, Text) | |
51 | |
52 _re_drive = re.compile(r"/([a-zA-Z]):") | |
53 | |
54 | |
55 def file_uri(path, split_frag=False): # type: (str, bool) -> str | |
56 if path.startswith("file://"): | |
57 return path | |
58 if split_frag: | |
59 pathsp = path.split("#", 2) | |
60 frag = "#" + urllib.parse.quote(str(pathsp[1])) if len(pathsp) == 2 else "" | |
61 urlpath = urllib.request.pathname2url(str(pathsp[0])) | |
62 else: | |
63 urlpath = urllib.request.pathname2url(path) | |
64 frag = "" | |
65 if urlpath.startswith("//"): | |
66 return "file:{}{}".format(urlpath, frag) | |
67 return "file://{}{}".format(urlpath, frag) | |
68 | |
69 | |
70 def uri_file_path(url): # type: (str) -> str | |
71 split = urllib.parse.urlsplit(url) | |
72 if split.scheme == "file": | |
73 return urllib.request.url2pathname(str(split.path)) + ( | |
74 "#" + urllib.parse.unquote(str(split.fragment)) | |
75 if bool(split.fragment) | |
76 else "" | |
77 ) | |
78 raise ValidationException("Not a file URI: {}".format(url)) | |
79 | |
80 | |
81 def to_validation_exception( | |
82 e, | |
83 ): # type: (yaml.error.MarkedYAMLError) -> ValidationException | |
84 fname_regex = re.compile(r"^file://" + re.escape(os.getcwd()) + "/") | |
85 | |
86 exc = ValidationException(e.problem) | |
87 mark = e.problem_mark | |
88 exc.file = re.sub(fname_regex, "", mark.name) | |
89 exc.start = (mark.line + 1, mark.column + 1) | |
90 exc.end = None | |
91 | |
92 if e.context: | |
93 parent = ValidationException(e.context) | |
94 mark = e.context_mark | |
95 parent.file = re.sub(fname_regex, "", mark.name) | |
96 parent.start = (mark.line + 1, mark.column + 1) | |
97 parent.end = None | |
98 parent.children = [exc] | |
99 return parent | |
100 else: | |
101 return exc | |
102 | |
103 | |
104 class NormDict(CommentedMap): | |
105 """A Dict where all keys are normalized using the provided function.""" | |
106 | |
107 def __init__(self, normalize=Text): # type: (Callable[[Text], Text]) -> None | |
108 super(NormDict, self).__init__() | |
109 self.normalize = normalize | |
110 | |
111 def __getitem__(self, key): # type: (Any) -> Any | |
112 return super(NormDict, self).__getitem__(self.normalize(key)) | |
113 | |
114 def __setitem__(self, key, value): # type: (Any, Any) -> Any | |
115 return super(NormDict, self).__setitem__(self.normalize(key), value) | |
116 | |
117 def __delitem__(self, key): # type: (Any) -> Any | |
118 return super(NormDict, self).__delitem__(self.normalize(key)) | |
119 | |
120 def __contains__(self, key): # type: (Any) -> Any | |
121 return super(NormDict, self).__contains__(self.normalize(key)) | |
122 | |
123 | |
124 def merge_properties(a, b): # type: (List[Any], List[Any]) -> Dict[Any, Any] | |
125 c = {} | |
126 for i in a: | |
127 if i not in b: | |
128 c[i] = a[i] | |
129 for i in b: | |
130 if i not in a: | |
131 c[i] = b[i] | |
132 for i in a: | |
133 if i in b: | |
134 c[i] = aslist(a[i]) + aslist(b[i]) # type: ignore | |
135 | |
136 return c | |
137 | |
138 | |
139 def SubLoader(loader): # type: (Loader) -> Loader | |
140 return Loader( | |
141 loader.ctx, | |
142 schemagraph=loader.graph, | |
143 foreign_properties=loader.foreign_properties, | |
144 idx=loader.idx, | |
145 cache=loader.cache, | |
146 fetcher_constructor=loader.fetcher_constructor, | |
147 skip_schemas=loader.skip_schemas, | |
148 url_fields=loader.url_fields, | |
149 allow_attachments=loader.allow_attachments, | |
150 ) | |
151 | |
152 | |
153 class Fetcher(object): | |
154 def fetch_text(self, url): # type: (Text) -> Text | |
155 raise NotImplementedError() | |
156 | |
157 def check_exists(self, url): # type: (Text) -> bool | |
158 raise NotImplementedError() | |
159 | |
160 def urljoin(self, base_url, url): # type: (Text, Text) -> Text | |
161 raise NotImplementedError() | |
162 | |
163 schemes = [u"file", u"http", u"https", u"mailto"] | |
164 | |
165 def supported_schemes(self): # type: () -> List[Text] | |
166 return self.schemes | |
167 | |
168 | |
169 class DefaultFetcher(Fetcher): | |
170 def __init__( | |
171 self, | |
172 cache, # type: Dict[Text, Union[Text, bool]] | |
173 session, # type: Optional[requests.sessions.Session] | |
174 ): # type: (...) -> None | |
175 self.cache = cache | |
176 self.session = session | |
177 | |
178 def fetch_text(self, url): | |
179 # type: (Text) -> Text | |
180 if url in self.cache and self.cache[url] is not True: | |
181 # treat "True" as a placeholder that indicates something exists but | |
182 # not necessarily what its contents is. | |
183 return cast(Text, self.cache[url]) | |
184 | |
185 split = urllib.parse.urlsplit(url) | |
186 scheme, path = split.scheme, split.path | |
187 | |
188 if scheme in [u"http", u"https"] and self.session is not None: | |
189 try: | |
190 resp = self.session.get(url) | |
191 resp.raise_for_status() | |
192 except Exception as e: | |
193 raise_from( | |
194 ValidationException("Error fetching {}: {}".format(url, e)), e | |
195 ) | |
196 return resp.text | |
197 if scheme == "file": | |
198 try: | |
199 # On Windows, url.path will be /drive:/path ; on Unix systems, | |
200 # /path. As we want drive:/path instead of /drive:/path on Windows, | |
201 # remove the leading /. | |
202 if os.path.isabs( | |
203 path[1:] | |
204 ): # checking if pathis valid after removing front / or not | |
205 path = path[1:] | |
206 with open( | |
207 urllib.request.url2pathname(str(path)), encoding="utf-8" | |
208 ) as fp: | |
209 return Text(fp.read()) | |
210 | |
211 except (OSError, IOError) as err: | |
212 if err.filename == path: | |
213 raise_from(ValidationException(Text(err)), err) | |
214 else: | |
215 raise_from( | |
216 ValidationException("Error reading {}: {}".format(url, err)), | |
217 err, | |
218 ) | |
219 raise ValidationException("Unsupported scheme in url: {}".format(url)) | |
220 | |
221 def check_exists(self, url): # type: (Text) -> bool | |
222 if url in self.cache: | |
223 return True | |
224 | |
225 split = urllib.parse.urlsplit(url) | |
226 scheme, path = split.scheme, split.path | |
227 | |
228 if scheme in [u"http", u"https"] and self.session is not None: | |
229 try: | |
230 resp = self.session.head(url) | |
231 resp.raise_for_status() | |
232 except Exception: | |
233 return False | |
234 self.cache[url] = True | |
235 return True | |
236 if scheme == "file": | |
237 return os.path.exists(urllib.request.url2pathname(str(path))) | |
238 if scheme == "mailto": | |
239 return True | |
240 raise ValidationException("Unsupported scheme in url: {}".format(url)) | |
241 | |
242 def urljoin(self, base_url, url): # type: (Text, Text) -> Text | |
243 if url.startswith("_:"): | |
244 return url | |
245 | |
246 basesplit = urllib.parse.urlsplit(base_url) | |
247 split = urllib.parse.urlsplit(url) | |
248 if basesplit.scheme and basesplit.scheme != "file" and split.scheme == "file": | |
249 raise ValidationException( | |
250 "Not resolving potential remote exploit {} from base {}".format( | |
251 url, base_url | |
252 ) | |
253 ) | |
254 | |
255 if sys.platform == "win32": | |
256 if base_url == url: | |
257 return url | |
258 basesplit = urllib.parse.urlsplit(base_url) | |
259 # note that below might split | |
260 # "C:" with "C" as URI scheme | |
261 split = urllib.parse.urlsplit(url) | |
262 | |
263 has_drive = split.scheme and len(split.scheme) == 1 | |
264 | |
265 if basesplit.scheme == "file": | |
266 # Special handling of relative file references on Windows | |
267 # as urllib seems to not be quite up to the job | |
268 | |
269 # netloc MIGHT appear in equivalents of UNC Strings | |
270 # \\server1.example.com\path as | |
271 # file:///server1.example.com/path | |
272 # https://tools.ietf.org/html/rfc8089#appendix-E.3.2 | |
273 # (TODO: test this) | |
274 netloc = split.netloc or basesplit.netloc | |
275 | |
276 # Check if url is a local path like "C:/Users/fred" | |
277 # or actually an absolute URI like http://example.com/fred | |
278 if has_drive: | |
279 # Assume split.scheme is actually a drive, e.g. "C:" | |
280 # so we'll recombine into a path | |
281 path_with_drive = urllib.parse.urlunsplit( | |
282 (split.scheme, "", split.path, "", "") | |
283 ) | |
284 # Compose new file:/// URI with path_with_drive | |
285 # .. carrying over any #fragment (?query just in case..) | |
286 return urllib.parse.urlunsplit( | |
287 ("file", netloc, path_with_drive, split.query, split.fragment) | |
288 ) | |
289 if ( | |
290 not split.scheme | |
291 and not netloc | |
292 and split.path | |
293 and split.path.startswith("/") | |
294 ): | |
295 # Relative - but does it have a drive? | |
296 base_drive = _re_drive.match(basesplit.path) | |
297 drive = _re_drive.match(split.path) | |
298 if base_drive and not drive: | |
299 # Keep drive letter from base_url | |
300 # https://tools.ietf.org/html/rfc8089#appendix-E.2.1 | |
301 # e.g. urljoin("file:///D:/bar/a.txt", "/foo/b.txt") | |
302 # == file:///D:/foo/b.txt | |
303 path_with_drive = "/{}:{}".format( | |
304 base_drive.group(1), split.path | |
305 ) | |
306 return urllib.parse.urlunsplit( | |
307 ( | |
308 "file", | |
309 netloc, | |
310 path_with_drive, | |
311 split.query, | |
312 split.fragment, | |
313 ) | |
314 ) | |
315 | |
316 # else: fall-through to resolve as relative URI | |
317 elif has_drive: | |
318 # Base is http://something but url is C:/something - which urllib | |
319 # would wrongly resolve as an absolute path that could later be used | |
320 # to access local files | |
321 raise ValidationException( | |
322 "Not resolving potential remote exploit {} from base {}".format( | |
323 url, base_url | |
324 ) | |
325 ) | |
326 | |
327 return urllib.parse.urljoin(base_url, url) | |
328 | |
329 | |
330 idx_type = Dict[Text, Union[CommentedMap, CommentedSeq, Text, None]] | |
331 fetcher_sig = Callable[ | |
332 [Dict[Text, Union[Text, bool]], requests.sessions.Session], Fetcher | |
333 ] | |
334 attachements_sig = Callable[[Union[CommentedMap, CommentedSeq]], bool] | |
335 | |
336 | |
337 class Loader(object): | |
338 def __init__( | |
339 self, | |
340 ctx, # type: ContextType | |
341 schemagraph=None, # type: Optional[Graph] | |
342 foreign_properties=None, # type: Optional[Set[Text]] | |
343 idx=None, # type: Optional[idx_type] | |
344 cache=None, # type: Optional[Dict[Text, Any]] | |
345 session=None, # type: Optional[requests.sessions.Session] | |
346 fetcher_constructor=None, # type: Optional[fetcher_sig] | |
347 skip_schemas=None, # type: Optional[bool] | |
348 url_fields=None, # type: Optional[Set[Text]] | |
349 allow_attachments=None, # type: Optional[attachements_sig] | |
350 ): | |
351 # type: (...) -> None | |
352 | |
353 if idx is not None: | |
354 self.idx = idx | |
355 else: | |
356 self.idx = NormDict(lambda url: urllib.parse.urlsplit(url).geturl()) | |
357 | |
358 self.ctx = {} # type: ContextType | |
359 if schemagraph is not None: | |
360 self.graph = schemagraph | |
361 else: | |
362 self.graph = Graph() | |
363 | |
364 if foreign_properties is not None: | |
365 self.foreign_properties = set(foreign_properties) | |
366 else: | |
367 self.foreign_properties = set() | |
368 | |
369 if cache is not None: | |
370 self.cache = cache | |
371 else: | |
372 self.cache = {} | |
373 | |
374 if skip_schemas is not None: | |
375 self.skip_schemas = skip_schemas | |
376 else: | |
377 self.skip_schemas = False | |
378 | |
379 if session is None: | |
380 if "HOME" in os.environ: | |
381 self.session = CacheControl( | |
382 requests.Session(), | |
383 cache=FileCache( | |
384 os.path.join(os.environ["HOME"], ".cache", "salad") | |
385 ), | |
386 ) | |
387 elif "TMP" in os.environ: | |
388 self.session = CacheControl( | |
389 requests.Session(), | |
390 cache=FileCache(os.path.join(os.environ["TMP"], ".cache", "salad")), | |
391 ) | |
392 else: | |
393 self.session = CacheControl( | |
394 requests.Session(), | |
395 cache=FileCache(os.path.join("/tmp", ".cache", "salad")), | |
396 ) | |
397 else: | |
398 self.session = session | |
399 | |
400 if fetcher_constructor is not None: | |
401 self.fetcher_constructor = fetcher_constructor | |
402 else: | |
403 self.fetcher_constructor = DefaultFetcher | |
404 self.fetcher = self.fetcher_constructor(self.cache, self.session) | |
405 self.fetch_text = self.fetcher.fetch_text | |
406 self.check_exists = self.fetcher.check_exists | |
407 | |
408 if url_fields is None: | |
409 self.url_fields = set() # type: Set[Text] | |
410 else: | |
411 self.url_fields = set(url_fields) | |
412 | |
413 self.scoped_ref_fields = {} # type: Dict[Text, int] | |
414 self.vocab_fields = set() # type: Set[Text] | |
415 self.identifiers = [] # type: List[Text] | |
416 self.identity_links = set() # type: Set[Text] | |
417 self.standalone = None # type: Optional[Set[Text]] | |
418 self.nolinkcheck = set() # type: Set[Text] | |
419 self.vocab = {} # type: Dict[Text, Text] | |
420 self.rvocab = {} # type: Dict[Text, Text] | |
421 self.idmap = {} # type: Dict[Text, Any] | |
422 self.mapPredicate = {} # type: Dict[Text, Text] | |
423 self.type_dsl_fields = set() # type: Set[Text] | |
424 self.subscopes = {} # type: Dict[Text, Text] | |
425 self.secondaryFile_dsl_fields = set() # type: Set[Text] | |
426 self.allow_attachments = allow_attachments | |
427 | |
428 self.add_context(ctx) | |
429 | |
430 def expand_url( | |
431 self, | |
432 url, # type: Text | |
433 base_url, # type: Text | |
434 scoped_id=False, # type: bool | |
435 vocab_term=False, # type: bool | |
436 scoped_ref=None, # type: Optional[int] | |
437 ): | |
438 # type: (...) -> Text | |
439 if url in (u"@id", u"@type") or url is None: | |
440 return url | |
441 | |
442 if vocab_term and url in self.vocab: | |
443 return url | |
444 | |
445 if url.startswith("_:"): | |
446 return url | |
447 | |
448 if bool(self.vocab) and u":" in url: | |
449 prefix = url.split(u":")[0] | |
450 if prefix in self.vocab: | |
451 url = self.vocab[prefix] + url[len(prefix) + 1 :] | |
452 elif prefix not in self.fetcher.supported_schemes(): | |
453 _logger.warning( | |
454 "URI prefix '%s' of '%s' not recognized, are you missing a " | |
455 "$namespaces section?", | |
456 prefix, | |
457 url, | |
458 ) | |
459 | |
460 split = urllib.parse.urlsplit(url) | |
461 | |
462 if ( | |
463 (bool(split.scheme) and split.scheme in [u"http", u"https", u"file"]) | |
464 or url.startswith(u"$(") | |
465 or url.startswith(u"${") | |
466 ): | |
467 pass | |
468 elif scoped_id and not bool(split.fragment): | |
469 splitbase = urllib.parse.urlsplit(base_url) | |
470 frg = u"" | |
471 if bool(splitbase.fragment): | |
472 frg = splitbase.fragment + u"/" + split.path | |
473 else: | |
474 frg = split.path | |
475 pt = splitbase.path if splitbase.path != "" else "/" | |
476 url = urllib.parse.urlunsplit( | |
477 (splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg) | |
478 ) | |
479 elif scoped_ref is not None and not split.fragment: | |
480 pass | |
481 else: | |
482 url = self.fetcher.urljoin(base_url, url) | |
483 | |
484 if vocab_term and url in self.rvocab: | |
485 return self.rvocab[url] | |
486 else: | |
487 return url | |
488 | |
489 def _add_properties(self, s): # type: (Text) -> None | |
490 for _, _, rng in self.graph.triples((s, RDFS.range, None)): | |
491 literal = ( | |
492 Text(rng).startswith(u"http://www.w3.org/2001/XMLSchema#") | |
493 and not Text(rng) == u"http://www.w3.org/2001/XMLSchema#anyURI" | |
494 ) or Text(rng) == u"http://www.w3.org/2000/01/rdf-schema#Literal" | |
495 if not literal: | |
496 self.url_fields.add(Text(s)) | |
497 self.foreign_properties.add(Text(s)) | |
498 | |
499 def add_namespaces(self, ns): # type: (Dict[Text, Text]) -> None | |
500 self.vocab.update(ns) | |
501 | |
502 def add_schemas(self, ns, base_url): | |
503 # type: (Union[List[Text], Text], Text) -> None | |
504 if self.skip_schemas: | |
505 return | |
506 for sch in aslist(ns): | |
507 try: | |
508 fetchurl = self.fetcher.urljoin(base_url, sch) | |
509 if fetchurl not in self.cache or self.cache[fetchurl] is True: | |
510 _logger.debug("Getting external schema %s", fetchurl) | |
511 content = self.fetch_text(fetchurl) | |
512 self.cache[fetchurl] = Graph() | |
513 for fmt in ["xml", "turtle", "rdfa"]: | |
514 try: | |
515 self.cache[fetchurl].parse( | |
516 data=content, format=fmt, publicID=str(fetchurl) | |
517 ) | |
518 self.graph += self.cache[fetchurl] | |
519 break | |
520 except xml.sax.SAXParseException: | |
521 pass | |
522 except TypeError: | |
523 pass | |
524 except BadSyntax: | |
525 pass | |
526 except Exception as e: | |
527 _logger.warning( | |
528 "Could not load extension schema %s: %s", fetchurl, Text(e) | |
529 ) | |
530 | |
531 for s, _, _ in self.graph.triples((None, RDF.type, RDF.Property)): | |
532 self._add_properties(s) | |
533 for s, _, o in self.graph.triples((None, RDFS.subPropertyOf, None)): | |
534 self._add_properties(s) | |
535 self._add_properties(o) | |
536 for s, _, _ in self.graph.triples((None, RDFS.range, None)): | |
537 self._add_properties(s) | |
538 for s, _, _ in self.graph.triples((None, RDF.type, OWL.ObjectProperty)): | |
539 self._add_properties(s) | |
540 | |
541 for s, _, _ in self.graph.triples((None, None, None)): | |
542 self.idx[Text(s)] = None | |
543 | |
544 def add_context(self, newcontext, baseuri=""): | |
545 # type: (ContextType, Text) -> None | |
546 if bool(self.vocab): | |
547 raise ValidationException("Refreshing context that already has stuff in it") | |
548 | |
549 self.url_fields = set(("$schemas",)) | |
550 self.scoped_ref_fields = {} | |
551 self.vocab_fields = set() | |
552 self.identifiers = [] | |
553 self.identity_links = set() | |
554 self.standalone = set() | |
555 self.nolinkcheck = set() | |
556 self.idmap = {} | |
557 self.mapPredicate = {} | |
558 self.vocab = {} | |
559 self.rvocab = {} | |
560 self.type_dsl_fields = set() | |
561 self.secondaryFile_dsl_fields = set() | |
562 self.subscopes = {} | |
563 | |
564 self.ctx.update(_copy_dict_without_key(newcontext, u"@context")) | |
565 | |
566 _logger.debug("ctx is %s", self.ctx) | |
567 | |
568 for key, value in self.ctx.items(): | |
569 if value == u"@id": | |
570 self.identifiers.append(key) | |
571 self.identity_links.add(key) | |
572 elif isinstance(value, MutableMapping): | |
573 if value.get(u"@type") == u"@id": | |
574 self.url_fields.add(key) | |
575 if u"refScope" in value: | |
576 self.scoped_ref_fields[key] = value[u"refScope"] | |
577 if value.get(u"identity", False): | |
578 self.identity_links.add(key) | |
579 | |
580 if value.get(u"@type") == u"@vocab": | |
581 self.url_fields.add(key) | |
582 self.vocab_fields.add(key) | |
583 if u"refScope" in value: | |
584 self.scoped_ref_fields[key] = value[u"refScope"] | |
585 if value.get(u"typeDSL"): | |
586 self.type_dsl_fields.add(key) | |
587 | |
588 if value.get(u"secondaryFilesDSL"): | |
589 self.secondaryFile_dsl_fields.add(key) | |
590 | |
591 if value.get(u"noLinkCheck"): | |
592 self.nolinkcheck.add(key) | |
593 | |
594 if value.get(u"mapSubject"): | |
595 self.idmap[key] = value[u"mapSubject"] | |
596 | |
597 if value.get(u"mapPredicate"): | |
598 self.mapPredicate[key] = value[u"mapPredicate"] | |
599 | |
600 if value.get(u"@id"): | |
601 self.vocab[key] = value[u"@id"] | |
602 | |
603 if value.get(u"subscope"): | |
604 self.subscopes[key] = value[u"subscope"] | |
605 | |
606 elif isinstance(value, string_types): | |
607 self.vocab[key] = value | |
608 | |
609 for k, v in self.vocab.items(): | |
610 self.rvocab[self.expand_url(v, u"", scoped_id=False)] = k | |
611 | |
612 self.identifiers.sort() | |
613 | |
614 _logger.debug("identifiers is %s", self.identifiers) | |
615 _logger.debug("identity_links is %s", self.identity_links) | |
616 _logger.debug("url_fields is %s", self.url_fields) | |
617 _logger.debug("vocab_fields is %s", self.vocab_fields) | |
618 _logger.debug("vocab is %s", self.vocab) | |
619 | |
620 resolved_ref_type = Tuple[ | |
621 Optional[Union[CommentedMap, CommentedSeq, Text]], CommentedMap | |
622 ] | |
623 | |
624 def resolve_ref( | |
625 self, | |
626 ref, # type: Union[CommentedMap, CommentedSeq, Text] | |
627 base_url=None, # type: Optional[Text] | |
628 checklinks=True, # type: bool | |
629 strict_foreign_properties=False, # type: bool | |
630 ): | |
631 # type: (...) -> Loader.resolved_ref_type | |
632 | |
633 lref = ref # type: Union[CommentedMap, CommentedSeq, Text, None] | |
634 obj = None # type: Optional[CommentedMap] | |
635 resolved_obj = None # type: Optional[Union[CommentedMap, CommentedSeq, Text]] | |
636 inc = False | |
637 mixin = None # type: Optional[MutableMapping[Text, Any]] | |
638 | |
639 if not base_url: | |
640 base_url = file_uri(os.getcwd()) + "/" | |
641 | |
642 sl = SourceLine(obj, None) | |
643 # If `ref` is a dict, look for special directives. | |
644 if isinstance(lref, CommentedMap): | |
645 obj = lref | |
646 if "$import" in obj: | |
647 sl = SourceLine(obj, "$import") | |
648 if len(obj) == 1: | |
649 lref = obj[u"$import"] | |
650 obj = None | |
651 else: | |
652 raise ValidationException( | |
653 u"'$import' must be the only field in {}".format(obj), sl | |
654 ) | |
655 elif "$include" in obj: | |
656 sl = SourceLine(obj, "$include") | |
657 if len(obj) == 1: | |
658 lref = obj[u"$include"] | |
659 inc = True | |
660 obj = None | |
661 else: | |
662 raise ValidationException( | |
663 u"'$include' must be the only field in {}".format(obj), sl | |
664 ) | |
665 elif "$mixin" in obj: | |
666 sl = SourceLine(obj, "$mixin") | |
667 lref = obj[u"$mixin"] | |
668 mixin = obj | |
669 obj = None | |
670 else: | |
671 lref = None | |
672 for identifier in self.identifiers: | |
673 if identifier in obj: | |
674 lref = obj[identifier] | |
675 break | |
676 if not lref: | |
677 raise ValidationException( | |
678 u"Object `{}` does not have identifier field in {}".format( | |
679 obj, self.identifiers | |
680 ), | |
681 sl, | |
682 ) | |
683 | |
684 if not isinstance(lref, string_types): | |
685 raise ValidationException( | |
686 u"Expected CommentedMap or string, got {}: `{}`".format( | |
687 type(lref), lref | |
688 ) | |
689 ) | |
690 | |
691 if isinstance(lref, string_types) and os.sep == "\\": | |
692 # Convert Windows path separator in ref | |
693 lref = lref.replace("\\", "/") | |
694 | |
695 url = self.expand_url(lref, base_url, scoped_id=(obj is not None)) | |
696 # Has this reference been loaded already? | |
697 if url in self.idx and (not mixin): | |
698 resolved_obj = self.idx[url] | |
699 if isinstance(resolved_obj, MutableMapping): | |
700 metadata = self.idx.get(urllib.parse.urldefrag(url)[0], CommentedMap()) | |
701 if isinstance(metadata, MutableMapping): | |
702 if u"$graph" in resolved_obj: | |
703 metadata = _copy_dict_without_key(resolved_obj, u"$graph") | |
704 return resolved_obj[u"$graph"], metadata | |
705 else: | |
706 return resolved_obj, metadata | |
707 else: | |
708 raise ValidationException( | |
709 u"Expected CommentedMap, got {}: `{}`".format( | |
710 type(metadata), metadata | |
711 ) | |
712 ) | |
713 elif isinstance(resolved_obj, MutableSequence): | |
714 metadata = self.idx.get(urllib.parse.urldefrag(url)[0], CommentedMap()) | |
715 if isinstance(metadata, MutableMapping): | |
716 return resolved_obj, metadata | |
717 else: | |
718 return resolved_obj, CommentedMap() | |
719 elif isinstance(resolved_obj, string_types): | |
720 return resolved_obj, CommentedMap() | |
721 else: | |
722 raise ValidationException( | |
723 u"Expected MutableMapping or MutableSequence, got {}: `{}`".format( | |
724 type(resolved_obj), resolved_obj | |
725 ) | |
726 ) | |
727 | |
728 # "$include" directive means load raw text | |
729 if inc: | |
730 return self.fetch_text(url), CommentedMap() | |
731 | |
732 doc = None | |
733 if isinstance(obj, MutableMapping): | |
734 for identifier in self.identifiers: | |
735 obj[identifier] = url | |
736 doc_url = url | |
737 else: | |
738 # Load structured document | |
739 doc_url, frg = urllib.parse.urldefrag(url) | |
740 if doc_url in self.idx and (not mixin): | |
741 # If the base document is in the index, it was already loaded, | |
742 # so if we didn't find the reference earlier then it must not | |
743 # exist. | |
744 raise ValidationException( | |
745 u"Reference `#{}` not found in file `{}`.".format(frg, doc_url), sl | |
746 ) | |
747 doc = self.fetch(doc_url, inject_ids=(not mixin)) | |
748 | |
749 # Recursively expand urls and resolve directives | |
750 if bool(mixin): | |
751 doc = copy.deepcopy(doc) | |
752 if doc is not None and mixin is not None: | |
753 doc.update(mixin) | |
754 del doc["$mixin"] | |
755 resolved_obj, metadata = self.resolve_all( | |
756 doc, | |
757 base_url, | |
758 file_base=doc_url, | |
759 checklinks=checklinks, | |
760 strict_foreign_properties=strict_foreign_properties, | |
761 ) | |
762 else: | |
763 if doc: | |
764 resolve_target = doc | |
765 else: | |
766 resolve_target = obj | |
767 resolved_obj, metadata = self.resolve_all( | |
768 resolve_target, | |
769 doc_url, | |
770 checklinks=checklinks, | |
771 strict_foreign_properties=strict_foreign_properties, | |
772 ) | |
773 | |
774 # Requested reference should be in the index now, otherwise it's a bad | |
775 # reference | |
776 if not bool(mixin): | |
777 if url in self.idx: | |
778 resolved_obj = self.idx[url] | |
779 else: | |
780 raise ValidationException( | |
781 "Reference `{}` is not in the index. Index contains: {}".format( | |
782 url, ", ".join(self.idx) | |
783 ) | |
784 ) | |
785 | |
786 if isinstance(resolved_obj, CommentedMap): | |
787 if u"$graph" in resolved_obj: | |
788 metadata = _copy_dict_without_key(resolved_obj, u"$graph") | |
789 return resolved_obj[u"$graph"], metadata | |
790 else: | |
791 return resolved_obj, metadata | |
792 else: | |
793 return resolved_obj, metadata | |
794 | |
795 def _resolve_idmap( | |
796 self, | |
797 document, # type: CommentedMap | |
798 loader, # type: Loader | |
799 ): | |
800 # type: (...) -> None | |
801 # Convert fields with mapSubject into lists | |
802 # use mapPredicate if the mapped value isn't a dict. | |
803 for idmapField in loader.idmap: | |
804 if idmapField in document: | |
805 idmapFieldValue = document[idmapField] | |
806 if ( | |
807 isinstance(idmapFieldValue, MutableMapping) | |
808 and "$import" not in idmapFieldValue | |
809 and "$include" not in idmapFieldValue | |
810 ): | |
811 ls = CommentedSeq() | |
812 for k in sorted(idmapFieldValue.keys()): | |
813 val = idmapFieldValue[k] | |
814 v = None # type: Optional[CommentedMap] | |
815 if not isinstance(val, CommentedMap): | |
816 if idmapField in loader.mapPredicate: | |
817 v = CommentedMap( | |
818 ((loader.mapPredicate[idmapField], val),) | |
819 ) | |
820 v.lc.add_kv_line_col( | |
821 loader.mapPredicate[idmapField], | |
822 document[idmapField].lc.data[k], | |
823 ) | |
824 v.lc.filename = document.lc.filename | |
825 else: | |
826 raise ValidationException( | |
827 "mapSubject '{}' value '{}' is not a dict " | |
828 "and does not have a mapPredicate.".format(k, v) | |
829 ) | |
830 else: | |
831 v = val | |
832 | |
833 v[loader.idmap[idmapField]] = k | |
834 v.lc.add_kv_line_col( | |
835 loader.idmap[idmapField], document[idmapField].lc.data[k] | |
836 ) | |
837 v.lc.filename = document.lc.filename | |
838 | |
839 ls.lc.add_kv_line_col(len(ls), document[idmapField].lc.data[k]) | |
840 | |
841 ls.lc.filename = document.lc.filename | |
842 ls.append(v) | |
843 | |
844 document[idmapField] = ls | |
845 | |
846 typeDSLregex = re.compile(Text(r"^([^[?]+)(\[\])?(\?)?$")) | |
847 | |
848 def _type_dsl( | |
849 self, | |
850 t, # type: Union[Text, Dict[Text, Text], List[Text]] | |
851 lc, # type: LineCol | |
852 filename, # type: Text | |
853 ): # type: (...) -> Union[Text, Dict[Text, Text], List[Text]] | |
854 | |
855 if not isinstance(t, string_types): | |
856 return t | |
857 | |
858 m = Loader.typeDSLregex.match(t) | |
859 if not m: | |
860 return t | |
861 first = m.group(1) | |
862 second = third = None | |
863 if bool(m.group(2)): | |
864 second = CommentedMap((("type", "array"), ("items", first))) | |
865 second.lc.add_kv_line_col("type", lc) | |
866 second.lc.add_kv_line_col("items", lc) | |
867 second.lc.filename = filename | |
868 if bool(m.group(3)): | |
869 third = CommentedSeq([u"null", second or first]) | |
870 third.lc.add_kv_line_col(0, lc) | |
871 third.lc.add_kv_line_col(1, lc) | |
872 third.lc.filename = filename | |
873 return third or second or first | |
874 | |
875 def _secondaryFile_dsl( | |
876 self, | |
877 t, # type: Union[Text, Dict[Text, Text], List[Text]] | |
878 lc, # type: LineCol | |
879 filename, # type: Text | |
880 ): # type: (...) -> Union[Text, Dict[Text, Text], List[Text]] | |
881 | |
882 if not isinstance(t, string_types): | |
883 return t | |
884 pat = t | |
885 req = None | |
886 if t.endswith("?"): | |
887 pat = t[0:-1] | |
888 req = False | |
889 | |
890 second = CommentedMap((("pattern", pat), ("required", req))) | |
891 second.lc.add_kv_line_col("pattern", lc) | |
892 second.lc.add_kv_line_col("required", lc) | |
893 second.lc.filename = filename | |
894 return second | |
895 | |
896 def _apply_dsl( | |
897 self, | |
898 datum, # type: Union[Text, Dict[Any, Any], List[Any]] | |
899 d, # type: Text | |
900 loader, # type: Loader | |
901 lc, # type: LineCol | |
902 filename, # type: Text | |
903 ): | |
904 # type: (...) -> Union[Text, Dict[Any, Any], List[Any]] | |
905 if d in loader.type_dsl_fields: | |
906 return self._type_dsl(datum, lc, filename) | |
907 elif d in loader.secondaryFile_dsl_fields: | |
908 return self._secondaryFile_dsl(datum, lc, filename) | |
909 else: | |
910 return datum | |
911 | |
912 def _resolve_dsl( | |
913 self, | |
914 document, # type: CommentedMap | |
915 loader, # type: Loader | |
916 ): | |
917 # type: (...) -> None | |
918 fields = list(loader.type_dsl_fields) | |
919 fields.extend(loader.secondaryFile_dsl_fields) | |
920 | |
921 for d in fields: | |
922 if d in document: | |
923 datum2 = datum = document[d] | |
924 if isinstance(datum, string_types): | |
925 datum2 = self._apply_dsl( | |
926 datum, d, loader, document.lc.data[d], document.lc.filename | |
927 ) | |
928 elif isinstance(datum, CommentedSeq): | |
929 datum2 = CommentedSeq() | |
930 for n, t in enumerate(datum): | |
931 if datum.lc and datum.lc.data: | |
932 datum2.lc.add_kv_line_col(len(datum2), datum.lc.data[n]) | |
933 datum2.append( | |
934 self._apply_dsl( | |
935 t, d, loader, datum.lc.data[n], document.lc.filename | |
936 ) | |
937 ) | |
938 else: | |
939 datum2.append(self._apply_dsl(t, d, loader, LineCol(), "")) | |
940 if isinstance(datum2, CommentedSeq): | |
941 datum3 = CommentedSeq() | |
942 seen = [] # type: List[Text] | |
943 for i, item in enumerate(datum2): | |
944 if isinstance(item, CommentedSeq): | |
945 for j, v in enumerate(item): | |
946 if v not in seen: | |
947 datum3.lc.add_kv_line_col( | |
948 len(datum3), item.lc.data[j] | |
949 ) | |
950 datum3.append(v) | |
951 seen.append(v) | |
952 else: | |
953 if item not in seen: | |
954 if datum2.lc and datum2.lc.data: | |
955 datum3.lc.add_kv_line_col( | |
956 len(datum3), datum2.lc.data[i] | |
957 ) | |
958 datum3.append(item) | |
959 seen.append(item) | |
960 document[d] = datum3 | |
961 else: | |
962 document[d] = datum2 | |
963 | |
964 def _resolve_identifier(self, document, loader, base_url): | |
965 # type: (CommentedMap, Loader, Text) -> Text | |
966 # Expand identifier field (usually 'id') to resolve scope | |
967 for identifer in loader.identifiers: | |
968 if identifer in document: | |
969 if isinstance(document[identifer], string_types): | |
970 document[identifer] = loader.expand_url( | |
971 document[identifer], base_url, scoped_id=True | |
972 ) | |
973 if document[identifer] not in loader.idx or isinstance( | |
974 loader.idx[document[identifer]], string_types | |
975 ): | |
976 loader.idx[document[identifer]] = document | |
977 base_url = document[identifer] | |
978 else: | |
979 raise ValidationException( | |
980 "identifier field '{}' must be a string".format( | |
981 document[identifer] | |
982 ) | |
983 ) | |
984 return base_url | |
985 | |
986 def _resolve_identity(self, document, loader, base_url): | |
987 # type: (Dict[Text, List[Text]], Loader, Text) -> None | |
988 # Resolve scope for identity fields (fields where the value is the | |
989 # identity of a standalone node, such as enum symbols) | |
990 for identifer in loader.identity_links: | |
991 if identifer in document and isinstance( | |
992 document[identifer], MutableSequence | |
993 ): | |
994 for n, _v in enumerate(document[identifer]): | |
995 if isinstance(document[identifer][n], string_types): | |
996 document[identifer][n] = loader.expand_url( | |
997 document[identifer][n], base_url, scoped_id=True | |
998 ) | |
999 if document[identifer][n] not in loader.idx: | |
1000 loader.idx[document[identifer][n]] = document[identifer][n] | |
1001 | |
1002 def _normalize_fields(self, document, loader): | |
1003 # type: (CommentedMap, Loader) -> None | |
1004 # Normalize fields which are prefixed or full URIn to vocabulary terms | |
1005 for d in list(document.keys()): | |
1006 d2 = loader.expand_url(d, u"", scoped_id=False, vocab_term=True) | |
1007 if d != d2: | |
1008 document[d2] = document[d] | |
1009 document.lc.add_kv_line_col(d2, document.lc.data[d]) | |
1010 del document[d] | |
1011 | |
1012 def _resolve_uris( | |
1013 self, | |
1014 document, # type: Dict[Text, Union[Text, List[Text]]] | |
1015 loader, # type: Loader | |
1016 base_url, # type: Text | |
1017 ): | |
1018 # type: (...) -> None | |
1019 # Resolve remaining URLs based on document base | |
1020 for d in loader.url_fields: | |
1021 if d in document: | |
1022 datum = document[d] | |
1023 if isinstance(datum, string_types): | |
1024 document[d] = loader.expand_url( | |
1025 datum, | |
1026 base_url, | |
1027 scoped_id=False, | |
1028 vocab_term=(d in loader.vocab_fields), | |
1029 scoped_ref=loader.scoped_ref_fields.get(d), | |
1030 ) | |
1031 elif isinstance(datum, MutableSequence): | |
1032 for i, url in enumerate(datum): | |
1033 if isinstance(url, string_types): | |
1034 datum[i] = loader.expand_url( | |
1035 url, | |
1036 base_url, | |
1037 scoped_id=False, | |
1038 vocab_term=(d in loader.vocab_fields), | |
1039 scoped_ref=loader.scoped_ref_fields.get(d), | |
1040 ) | |
1041 | |
1042 def resolve_all( | |
1043 self, | |
1044 document, # type: Union[CommentedMap, CommentedSeq] | |
1045 base_url, # type: Text | |
1046 file_base=None, # type: Optional[Text] | |
1047 checklinks=True, # type: bool | |
1048 strict_foreign_properties=False, # type: bool | |
1049 ): | |
1050 # type: (...) -> Loader.resolved_ref_type | |
1051 loader = self | |
1052 metadata = CommentedMap() # type: CommentedMap | |
1053 if file_base is None: | |
1054 file_base = base_url | |
1055 | |
1056 if isinstance(document, CommentedMap): | |
1057 # Handle $import and $include | |
1058 if u"$import" in document or u"$include" in document: | |
1059 return self.resolve_ref( | |
1060 document, | |
1061 base_url=file_base, | |
1062 checklinks=checklinks, | |
1063 strict_foreign_properties=strict_foreign_properties, | |
1064 ) | |
1065 elif u"$mixin" in document: | |
1066 return self.resolve_ref( | |
1067 document, | |
1068 base_url=base_url, | |
1069 checklinks=checklinks, | |
1070 strict_foreign_properties=strict_foreign_properties, | |
1071 ) | |
1072 elif isinstance(document, CommentedSeq): | |
1073 pass | |
1074 elif isinstance(document, (list, dict)): | |
1075 raise ValidationException( | |
1076 "Expected CommentedMap or CommentedSeq, got {}: `{}`".format( | |
1077 type(document), document | |
1078 ) | |
1079 ) | |
1080 else: | |
1081 return (document, metadata) | |
1082 | |
1083 newctx = None # type: Optional[Loader] | |
1084 if isinstance(document, CommentedMap): | |
1085 # Handle $base, $profile, $namespaces, $schemas and $graph | |
1086 if u"$base" in document: | |
1087 base_url = document[u"$base"] | |
1088 | |
1089 if u"$profile" in document: | |
1090 if newctx is None: | |
1091 newctx = SubLoader(self) | |
1092 newctx.add_namespaces(document.get(u"$namespaces", CommentedMap())) | |
1093 newctx.add_schemas(document.get(u"$schemas", []), document[u"$profile"]) | |
1094 | |
1095 if u"$namespaces" in document: | |
1096 if newctx is None: | |
1097 newctx = SubLoader(self) | |
1098 newctx.add_namespaces(document[u"$namespaces"]) | |
1099 | |
1100 if u"$schemas" in document: | |
1101 if newctx is None: | |
1102 newctx = SubLoader(self) | |
1103 newctx.add_schemas(document[u"$schemas"], file_base) | |
1104 | |
1105 if newctx is not None: | |
1106 loader = newctx | |
1107 | |
1108 for identifer in loader.identity_links: | |
1109 if identifer in document: | |
1110 if isinstance(document[identifer], string_types): | |
1111 document[identifer] = loader.expand_url( | |
1112 document[identifer], base_url, scoped_id=True | |
1113 ) | |
1114 loader.idx[document[identifer]] = document | |
1115 | |
1116 metadata = document | |
1117 if u"$graph" in document: | |
1118 document = document[u"$graph"] | |
1119 | |
1120 if isinstance(document, CommentedMap): | |
1121 self._normalize_fields(document, loader) | |
1122 self._resolve_idmap(document, loader) | |
1123 self._resolve_dsl(document, loader) | |
1124 base_url = self._resolve_identifier(document, loader, base_url) | |
1125 self._resolve_identity(document, loader, base_url) | |
1126 self._resolve_uris(document, loader, base_url) | |
1127 | |
1128 try: | |
1129 for key, val in document.items(): | |
1130 subscope = "" # type: Text | |
1131 if key in loader.subscopes: | |
1132 subscope = "/" + loader.subscopes[key] | |
1133 document[key], _ = loader.resolve_all( | |
1134 val, base_url + subscope, file_base=file_base, checklinks=False | |
1135 ) | |
1136 except ValidationException as v: | |
1137 _logger.warning("loader is %s", id(loader), exc_info=True) | |
1138 raise_from( | |
1139 ValidationException( | |
1140 "({}) ({}) Validation error in field {}:".format( | |
1141 id(loader), file_base, key | |
1142 ), | |
1143 None, | |
1144 [v], | |
1145 ), | |
1146 v, | |
1147 ) | |
1148 | |
1149 elif isinstance(document, CommentedSeq): | |
1150 i = 0 | |
1151 try: | |
1152 while i < len(document): | |
1153 val = document[i] | |
1154 if isinstance(val, CommentedMap) and ( | |
1155 u"$import" in val or u"$mixin" in val | |
1156 ): | |
1157 l, import_metadata = loader.resolve_ref( | |
1158 val, base_url=file_base, checklinks=False | |
1159 ) | |
1160 metadata.setdefault("$import_metadata", {}) | |
1161 for identifier in loader.identifiers: | |
1162 if identifier in import_metadata: | |
1163 metadata["$import_metadata"][ | |
1164 import_metadata[identifier] | |
1165 ] = import_metadata | |
1166 if isinstance(l, CommentedSeq): | |
1167 lc = document.lc.data[i] | |
1168 del document[i] | |
1169 llen = len(l) | |
1170 for j in range(len(document) + llen, i + llen, -1): | |
1171 document.lc.data[j - 1] = document.lc.data[j - llen] | |
1172 for item in l: | |
1173 document.insert(i, item) | |
1174 document.lc.data[i] = lc | |
1175 i += 1 | |
1176 else: | |
1177 document[i] = l | |
1178 i += 1 | |
1179 else: | |
1180 document[i], _ = loader.resolve_all( | |
1181 val, base_url, file_base=file_base, checklinks=False | |
1182 ) | |
1183 i += 1 | |
1184 except ValidationException as v: | |
1185 _logger.warning("failed", exc_info=True) | |
1186 raise_from( | |
1187 ValidationException( | |
1188 "({}) ({}) Validation error in position {}:".format( | |
1189 id(loader), file_base, i | |
1190 ), | |
1191 None, | |
1192 [v], | |
1193 ), | |
1194 v, | |
1195 ) | |
1196 | |
1197 if checklinks: | |
1198 all_doc_ids = {} # type: Dict[Text, Text] | |
1199 loader.validate_links( | |
1200 document, | |
1201 u"", | |
1202 all_doc_ids, | |
1203 strict_foreign_properties=strict_foreign_properties, | |
1204 ) | |
1205 | |
1206 return document, metadata | |
1207 | |
1208 def fetch(self, url, inject_ids=True): # type: (Text, bool) -> Any | |
1209 if url in self.idx: | |
1210 return self.idx[url] | |
1211 try: | |
1212 text = self.fetch_text(url) | |
1213 if isinstance(text, bytes): | |
1214 textIO = StringIO(text.decode("utf-8")) | |
1215 else: | |
1216 textIO = StringIO(text) | |
1217 textIO.name = str(url) | |
1218 attachments = yaml.round_trip_load_all(textIO, preserve_quotes=True) | |
1219 result = next(attachments) | |
1220 | |
1221 if self.allow_attachments is not None and self.allow_attachments(result): | |
1222 i = 1 | |
1223 for a in attachments: | |
1224 self.idx["{}#attachment-{}".format(url, i)] = a | |
1225 i += 1 | |
1226 add_lc_filename(result, url) | |
1227 except yaml.error.MarkedYAMLError as e: | |
1228 raise_from(to_validation_exception(e), e) | |
1229 if isinstance(result, CommentedMap) and inject_ids and bool(self.identifiers): | |
1230 for identifier in self.identifiers: | |
1231 if identifier not in result: | |
1232 result[identifier] = url | |
1233 self.idx[ | |
1234 self.expand_url(result[identifier], url, scoped_id=True) | |
1235 ] = result | |
1236 self.idx[url] = result | |
1237 return result | |
1238 | |
1239 FieldType = TypeVar("FieldType", Text, CommentedSeq, CommentedMap) | |
1240 | |
1241 def validate_scoped(self, field, link, docid): | |
1242 # type: (Text, Text, Text) -> Text | |
1243 split = urllib.parse.urlsplit(docid) | |
1244 sp = split.fragment.split(u"/") | |
1245 n = self.scoped_ref_fields[field] | |
1246 while n > 0 and len(sp) > 0: | |
1247 sp.pop() | |
1248 n -= 1 | |
1249 tried = [] | |
1250 while True: | |
1251 sp.append(link) | |
1252 url = urllib.parse.urlunsplit( | |
1253 (split.scheme, split.netloc, split.path, split.query, u"/".join(sp)) | |
1254 ) | |
1255 tried.append(url) | |
1256 if url in self.idx: | |
1257 return url | |
1258 sp.pop() | |
1259 if len(sp) == 0: | |
1260 break | |
1261 sp.pop() | |
1262 if onWindows() and link.startswith("file:"): | |
1263 link = link.lower() | |
1264 raise ValidationException( | |
1265 "Field `{}` references unknown identifier `{}`, tried {}".format( | |
1266 field, link, ", ".join(tried) | |
1267 ) | |
1268 ) | |
1269 | |
1270 def validate_link(self, field, link, docid, all_doc_ids): | |
1271 # type: (Text, Loader.FieldType, Text, Dict[Text, Text]) -> Loader.FieldType | |
1272 if field in self.nolinkcheck: | |
1273 return link | |
1274 if isinstance(link, string_types): | |
1275 if field in self.vocab_fields: | |
1276 if ( | |
1277 link not in self.vocab | |
1278 and link not in self.idx | |
1279 and link not in self.rvocab | |
1280 ): | |
1281 if field in self.scoped_ref_fields: | |
1282 return self.validate_scoped(field, link, docid) | |
1283 elif not self.check_exists(link): | |
1284 raise ValidationException( | |
1285 "Field `{}` contains undefined reference to `{}`".format( | |
1286 field, link | |
1287 ) | |
1288 ) | |
1289 elif link not in self.idx and link not in self.rvocab: | |
1290 if field in self.scoped_ref_fields: | |
1291 return self.validate_scoped(field, link, docid) | |
1292 elif not self.check_exists(link): | |
1293 raise ValidationException( | |
1294 "Field `{}` contains undefined reference to `{}`".format( | |
1295 field, link | |
1296 ) | |
1297 ) | |
1298 elif isinstance(link, CommentedSeq): | |
1299 errors = [] | |
1300 for n, i in enumerate(link): | |
1301 try: | |
1302 link[n] = self.validate_link(field, i, docid, all_doc_ids) | |
1303 except ValidationException as v: | |
1304 errors.append(v) | |
1305 if bool(errors): | |
1306 raise ValidationException("", None, errors) | |
1307 elif isinstance(link, CommentedMap): | |
1308 self.validate_links(link, docid, all_doc_ids) | |
1309 else: | |
1310 raise ValidationException( | |
1311 "`{}` field is {}, expected string, list, or a dict.".format( | |
1312 field, type(link).__name__ | |
1313 ) | |
1314 ) | |
1315 return link | |
1316 | |
1317 def getid(self, d): # type: (Any) -> Optional[Text] | |
1318 if isinstance(d, MutableMapping): | |
1319 for i in self.identifiers: | |
1320 if i in d: | |
1321 idd = d[i] | |
1322 if isinstance(idd, string_types): | |
1323 return idd | |
1324 return None | |
1325 | |
1326 def validate_links( | |
1327 self, | |
1328 document, # type: Union[CommentedMap, CommentedSeq, Text, None] | |
1329 base_url, # type: Text | |
1330 all_doc_ids, # type: Dict[Text, Text] | |
1331 strict_foreign_properties=False, # type: bool | |
1332 ): # type: (...) -> None | |
1333 docid = self.getid(document) | |
1334 if not docid: | |
1335 docid = base_url | |
1336 | |
1337 errors = [] # type: List[SchemaSaladException] | |
1338 iterator = None # type: Any | |
1339 if isinstance(document, MutableSequence): | |
1340 iterator = enumerate(document) | |
1341 elif isinstance(document, MutableMapping): | |
1342 for d in self.url_fields: | |
1343 sl = SourceLine(document, d, Text) | |
1344 try: | |
1345 if d in document and d not in self.identity_links: | |
1346 document[d] = self.validate_link( | |
1347 d, document[d], docid, all_doc_ids | |
1348 ) | |
1349 except SchemaSaladException as v: | |
1350 v = v.with_sourceline(sl) | |
1351 if d == "$schemas" or ( | |
1352 d in self.foreign_properties and not strict_foreign_properties | |
1353 ): | |
1354 _logger.warning(v) | |
1355 else: | |
1356 errors.append(v) | |
1357 # TODO: Validator should local scope only in which | |
1358 # duplicated keys are prohibited. | |
1359 # See also https://github.com/common-workflow-language/common-workflow-language/issues/734 # noqa: B950 | |
1360 # In the future, it should raise | |
1361 # ValidationException instead of _logger.warn | |
1362 try: | |
1363 for ( | |
1364 identifier | |
1365 ) in self.identifiers: # validate that each id is defined uniquely | |
1366 if identifier in document: | |
1367 sl = SourceLine(document, identifier, Text) | |
1368 if ( | |
1369 document[identifier] in all_doc_ids | |
1370 and sl.makeLead() != all_doc_ids[document[identifier]] | |
1371 ): | |
1372 _logger.warning( | |
1373 "%s object %s `%s` previously defined", | |
1374 all_doc_ids[document[identifier]], | |
1375 identifier, | |
1376 relname(document[identifier]), | |
1377 ) | |
1378 else: | |
1379 all_doc_ids[document[identifier]] = sl.makeLead() | |
1380 break | |
1381 except ValidationException as v: | |
1382 errors.append(v.with_sourceline(sl)) | |
1383 | |
1384 if hasattr(document, "iteritems"): | |
1385 iterator = iteritems(document) | |
1386 else: | |
1387 iterator = list(document.items()) | |
1388 else: | |
1389 return | |
1390 | |
1391 for key, val in iterator: | |
1392 sl = SourceLine(document, key, Text) | |
1393 try: | |
1394 self.validate_links( | |
1395 val, | |
1396 docid, | |
1397 all_doc_ids, | |
1398 strict_foreign_properties=strict_foreign_properties, | |
1399 ) | |
1400 except ValidationException as v: | |
1401 if key in self.nolinkcheck or ( | |
1402 isinstance(key, string_types) and ":" in key | |
1403 ): | |
1404 _logger.warning(v) | |
1405 else: | |
1406 docid2 = self.getid(val) | |
1407 if docid2 is not None: | |
1408 errors.append( | |
1409 ValidationException( | |
1410 "checking object `{}`".format(relname(docid2)), sl, [v] | |
1411 ) | |
1412 ) | |
1413 else: | |
1414 if isinstance(key, string_types): | |
1415 errors.append( | |
1416 ValidationException( | |
1417 "checking field `{}`".format(key), sl, [v] | |
1418 ) | |
1419 ) | |
1420 else: | |
1421 errors.append(ValidationException("checking item", sl, [v])) | |
1422 if bool(errors): | |
1423 if len(errors) > 1: | |
1424 raise ValidationException("", None, errors) | |
1425 else: | |
1426 raise errors[0] | |
1427 return | |
1428 | |
1429 | |
1430 D = TypeVar("D", CommentedMap, ContextType) | |
1431 | |
1432 | |
1433 def _copy_dict_without_key(from_dict, filtered_key): | |
1434 # type: (D, Any) -> D | |
1435 new_dict = CommentedMap(from_dict.items()) | |
1436 if filtered_key in new_dict: | |
1437 del new_dict[filtered_key] | |
1438 if isinstance(from_dict, CommentedMap): | |
1439 new_dict.lc.data = copy.copy(from_dict.lc.data) | |
1440 new_dict.lc.filename = from_dict.lc.filename | |
1441 return new_dict |