Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/schema_salad/schema.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """Functions to process Schema Salad schemas.""" | |
2 | |
3 import copy | |
4 import hashlib | |
5 from typing import ( | |
6 IO, | |
7 Any, | |
8 Dict, | |
9 List, | |
10 Mapping, | |
11 MutableMapping, | |
12 MutableSequence, | |
13 Optional, | |
14 Set, | |
15 Tuple, | |
16 TypeVar, | |
17 Union, | |
18 cast, | |
19 ) | |
20 from urllib.parse import urldefrag, urlparse | |
21 | |
22 from pkg_resources import resource_stream | |
23 from ruamel import yaml | |
24 from ruamel.yaml.comments import CommentedMap, CommentedSeq | |
25 | |
26 from schema_salad.utils import ( | |
27 CacheType, | |
28 ResolveType, | |
29 add_dictlist, | |
30 aslist, | |
31 convert_to_dict, | |
32 flatten, | |
33 json_dumps, | |
34 ) | |
35 | |
36 from . import _logger, jsonld_context, ref_resolver, validate | |
37 from .avro.schema import Names, SchemaParseException, make_avsc_object | |
38 from .exceptions import ( | |
39 ClassValidationException, | |
40 SchemaSaladException, | |
41 ValidationException, | |
42 ) | |
43 from .ref_resolver import Loader | |
44 from .sourceline import SourceLine, add_lc_filename, relname | |
45 | |
46 SALAD_FILES = ( | |
47 "metaschema.yml", | |
48 "metaschema_base.yml", | |
49 "salad.md", | |
50 "field_name.yml", | |
51 "import_include.md", | |
52 "link_res.yml", | |
53 "ident_res.yml", | |
54 "vocab_res.yml", | |
55 "vocab_res.yml", | |
56 "field_name_schema.yml", | |
57 "field_name_src.yml", | |
58 "field_name_proc.yml", | |
59 "ident_res_schema.yml", | |
60 "ident_res_src.yml", | |
61 "ident_res_proc.yml", | |
62 "link_res_schema.yml", | |
63 "link_res_src.yml", | |
64 "link_res_proc.yml", | |
65 "vocab_res_schema.yml", | |
66 "vocab_res_src.yml", | |
67 "vocab_res_proc.yml", | |
68 "map_res.yml", | |
69 "map_res_schema.yml", | |
70 "map_res_src.yml", | |
71 "map_res_proc.yml", | |
72 "typedsl_res.yml", | |
73 "typedsl_res_schema.yml", | |
74 "typedsl_res_src.yml", | |
75 "typedsl_res_proc.yml", | |
76 "sfdsl_res.yml", | |
77 "sfdsl_res_schema.yml", | |
78 "sfdsl_res_src.yml", | |
79 "sfdsl_res_proc.yml", | |
80 ) | |
81 | |
82 saladp = "https://w3id.org/cwl/salad#" | |
83 | |
84 | |
85 def get_metaschema() -> Tuple[Names, List[Dict[str, str]], Loader]: | |
86 """Instantiate the metaschema.""" | |
87 loader = ref_resolver.Loader( | |
88 { | |
89 "Any": saladp + "Any", | |
90 "ArraySchema": saladp + "ArraySchema", | |
91 "Array_symbol": saladp + "ArraySchema/type/Array_symbol", | |
92 "DocType": saladp + "DocType", | |
93 "Documentation": saladp + "Documentation", | |
94 "Documentation_symbol": saladp + "Documentation/type/Documentation_symbol", | |
95 "Documented": saladp + "Documented", | |
96 "EnumSchema": saladp + "EnumSchema", | |
97 "Enum_symbol": saladp + "EnumSchema/type/Enum_symbol", | |
98 "JsonldPredicate": saladp + "JsonldPredicate", | |
99 "NamedType": saladp + "NamedType", | |
100 "PrimitiveType": saladp + "PrimitiveType", | |
101 "RecordField": saladp + "RecordField", | |
102 "RecordSchema": saladp + "RecordSchema", | |
103 "Record_symbol": saladp + "RecordSchema/type/Record_symbol", | |
104 "SaladEnumSchema": saladp + "SaladEnumSchema", | |
105 "SaladRecordField": saladp + "SaladRecordField", | |
106 "SaladRecordSchema": saladp + "SaladRecordSchema", | |
107 "SchemaDefinedType": saladp + "SchemaDefinedType", | |
108 "SpecializeDef": saladp + "SpecializeDef", | |
109 "_container": saladp + "JsonldPredicate/_container", | |
110 "_id": {"@id": saladp + "_id", "@type": "@id", "identity": True}, | |
111 "_type": saladp + "JsonldPredicate/_type", | |
112 "abstract": saladp + "SaladRecordSchema/abstract", | |
113 "array": saladp + "array", | |
114 "boolean": "http://www.w3.org/2001/XMLSchema#boolean", | |
115 "dct": "http://purl.org/dc/terms/", | |
116 "default": {"@id": saladp + "default", "noLinkCheck": True}, | |
117 "doc": "rdfs:comment", | |
118 "docAfter": {"@id": saladp + "docAfter", "@type": "@id"}, | |
119 "docChild": {"@id": saladp + "docChild", "@type": "@id"}, | |
120 "docParent": {"@id": saladp + "docParent", "@type": "@id"}, | |
121 "documentRoot": saladp + "SchemaDefinedType/documentRoot", | |
122 "documentation": saladp + "documentation", | |
123 "double": "http://www.w3.org/2001/XMLSchema#double", | |
124 "enum": saladp + "enum", | |
125 "extends": {"@id": saladp + "extends", "@type": "@id", "refScope": 1}, | |
126 "fields": { | |
127 "@id": saladp + "fields", | |
128 "mapPredicate": "type", | |
129 "mapSubject": "name", | |
130 }, | |
131 "float": "http://www.w3.org/2001/XMLSchema#float", | |
132 "identity": saladp + "JsonldPredicate/identity", | |
133 "inVocab": saladp + "NamedType/inVocab", | |
134 "int": "http://www.w3.org/2001/XMLSchema#int", | |
135 "items": {"@id": saladp + "items", "@type": "@vocab", "refScope": 2}, | |
136 "jsonldPredicate": "sld:jsonldPredicate", | |
137 "long": "http://www.w3.org/2001/XMLSchema#long", | |
138 "mapPredicate": saladp + "JsonldPredicate/mapPredicate", | |
139 "mapSubject": saladp + "JsonldPredicate/mapSubject", | |
140 "name": "@id", | |
141 "noLinkCheck": saladp + "JsonldPredicate/noLinkCheck", | |
142 "null": saladp + "null", | |
143 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", | |
144 "rdfs": "http://www.w3.org/2000/01/rdf-schema#", | |
145 "record": saladp + "record", | |
146 "refScope": saladp + "JsonldPredicate/refScope", | |
147 "sld": saladp, | |
148 "specialize": { | |
149 "@id": saladp + "specialize", | |
150 "mapPredicate": "specializeTo", | |
151 "mapSubject": "specializeFrom", | |
152 }, | |
153 "specializeFrom": { | |
154 "@id": saladp + "specializeFrom", | |
155 "@type": "@id", | |
156 "refScope": 1, | |
157 }, | |
158 "specializeTo": { | |
159 "@id": saladp + "specializeTo", | |
160 "@type": "@id", | |
161 "refScope": 1, | |
162 }, | |
163 "string": "http://www.w3.org/2001/XMLSchema#string", | |
164 "subscope": saladp + "JsonldPredicate/subscope", | |
165 "symbols": {"@id": saladp + "symbols", "@type": "@id", "identity": True}, | |
166 "type": { | |
167 "@id": saladp + "type", | |
168 "@type": "@vocab", | |
169 "refScope": 2, | |
170 "typeDSL": True, | |
171 }, | |
172 "typeDSL": saladp + "JsonldPredicate/typeDSL", | |
173 "xsd": "http://www.w3.org/2001/XMLSchema#", | |
174 } | |
175 ) | |
176 | |
177 for salad in SALAD_FILES: | |
178 with resource_stream("schema_salad", "metaschema/" + salad) as stream: | |
179 loader.cache["https://w3id.org/cwl/" + salad] = stream.read().decode( | |
180 "UTF-8" | |
181 ) | |
182 | |
183 with resource_stream("schema_salad", "metaschema/metaschema.yml") as stream: | |
184 loader.cache["https://w3id.org/cwl/salad"] = stream.read().decode("UTF-8") | |
185 | |
186 j = yaml.main.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) | |
187 add_lc_filename(j, "metaschema.yml") | |
188 j2 = loader.resolve_all(j, saladp)[0] | |
189 | |
190 if not isinstance(j2, list): | |
191 _logger.error("%s", j2) | |
192 raise SchemaParseException(f"Not a list: {j2}") | |
193 else: | |
194 sch_obj = make_avro(j2, loader) | |
195 try: | |
196 sch_names = make_avro_schema_from_avro(sch_obj) | |
197 except SchemaParseException: | |
198 _logger.error("Metaschema error, avro was:\n%s", json_dumps(sch_obj, indent=4)) | |
199 raise | |
200 validate_doc(sch_names, j2, loader, strict=True) | |
201 return (sch_names, j2, loader) | |
202 | |
203 | |
204 def add_namespaces( | |
205 metadata: Mapping[str, Any], namespaces: MutableMapping[str, str] | |
206 ) -> None: | |
207 """Collect the provided namespaces, checking for conflicts.""" | |
208 for key, value in metadata.items(): | |
209 if key not in namespaces: | |
210 namespaces[key] = value | |
211 elif namespaces[key] != value: | |
212 raise ValidationException( | |
213 "Namespace prefix '{}' has conflicting definitions '{}'" | |
214 " and '{}'.".format(key, namespaces[key], value) | |
215 ) | |
216 | |
217 | |
218 def collect_namespaces(metadata: Mapping[str, Any]) -> Dict[str, str]: | |
219 """Walk through the metadata object, collecting namespace declarations.""" | |
220 namespaces = {} # type: Dict[str, str] | |
221 if "$import_metadata" in metadata: | |
222 for value in metadata["$import_metadata"].values(): | |
223 add_namespaces(collect_namespaces(value), namespaces) | |
224 if "$namespaces" in metadata: | |
225 add_namespaces(metadata["$namespaces"], namespaces) | |
226 return namespaces | |
227 | |
228 | |
229 schema_type = Tuple[Loader, Union[Names, SchemaParseException], Dict[str, Any], Loader] | |
230 | |
231 | |
232 def load_schema( | |
233 schema_ref: ResolveType, | |
234 cache: Optional[CacheType] = None, | |
235 ) -> schema_type: | |
236 """ | |
237 Load a schema that can be used to validate documents using load_and_validate. | |
238 | |
239 return: document_loader, avsc_names, schema_metadata, metaschema_loader | |
240 """ | |
241 | |
242 metaschema_names, _metaschema_doc, metaschema_loader = get_metaschema() | |
243 if cache is not None: | |
244 metaschema_loader.cache.update(cache) | |
245 schema_doc, schema_metadata = metaschema_loader.resolve_ref(schema_ref, "") | |
246 | |
247 if not isinstance(schema_doc, MutableSequence): | |
248 raise ValidationException("Schema reference must resolve to a list.") | |
249 | |
250 validate_doc(metaschema_names, schema_doc, metaschema_loader, True) | |
251 metactx = schema_metadata.get("@context", {}) | |
252 metactx.update(collect_namespaces(schema_metadata)) | |
253 schema_ctx = jsonld_context.salad_to_jsonld_context(schema_doc, metactx)[0] | |
254 | |
255 # Create the loader that will be used to load the target document. | |
256 document_loader = Loader(schema_ctx, cache=cache) | |
257 | |
258 # Make the Avro validation that will be used to validate the target | |
259 # document | |
260 avsc_names = make_avro_schema(schema_doc, document_loader) | |
261 | |
262 return document_loader, avsc_names, schema_metadata, metaschema_loader | |
263 | |
264 | |
265 def load_and_validate( | |
266 document_loader: Loader, | |
267 avsc_names: Names, | |
268 document: Union[CommentedMap, str], | |
269 strict: bool, | |
270 strict_foreign_properties: bool = False, | |
271 ) -> Tuple[Any, Dict[str, Any]]: | |
272 """Load a document and validate it with the provided schema. | |
273 | |
274 return data, metadata | |
275 """ | |
276 try: | |
277 if isinstance(document, CommentedMap): | |
278 data, metadata = document_loader.resolve_all( | |
279 document, | |
280 document["id"], | |
281 checklinks=True, | |
282 strict_foreign_properties=strict_foreign_properties, | |
283 ) | |
284 else: | |
285 data, metadata = document_loader.resolve_ref( | |
286 document, | |
287 checklinks=True, | |
288 strict_foreign_properties=strict_foreign_properties, | |
289 ) | |
290 | |
291 validate_doc( | |
292 avsc_names, | |
293 data, | |
294 document_loader, | |
295 strict, | |
296 strict_foreign_properties=strict_foreign_properties, | |
297 ) | |
298 except ValidationException as exc: | |
299 raise ValidationException("", None, [exc]) from exc | |
300 return data, metadata | |
301 | |
302 | |
303 def validate_doc( | |
304 schema_names: Names, | |
305 doc: ResolveType, | |
306 loader: Loader, | |
307 strict: bool, | |
308 strict_foreign_properties: bool = False, | |
309 ) -> None: | |
310 """Validate a document using the provided schema.""" | |
311 has_root = False | |
312 for root in schema_names.names.values(): | |
313 if (hasattr(root, "get_prop") and root.get_prop("documentRoot")) or ( | |
314 "documentRoot" in root.props | |
315 ): | |
316 has_root = True | |
317 break | |
318 | |
319 if not has_root: | |
320 raise ValidationException("No document roots defined in the schema") | |
321 | |
322 if isinstance(doc, MutableSequence): | |
323 vdoc = doc | |
324 elif isinstance(doc, CommentedMap): | |
325 vdoc = CommentedSeq([doc]) | |
326 vdoc.lc.add_kv_line_col(0, [doc.lc.line, doc.lc.col]) | |
327 vdoc.lc.filename = doc.lc.filename | |
328 else: | |
329 raise ValidationException("Document must be dict or list") | |
330 | |
331 roots = [] | |
332 for root in schema_names.names.values(): | |
333 if (hasattr(root, "get_prop") and root.get_prop("documentRoot")) or ( | |
334 root.props.get("documentRoot") | |
335 ): | |
336 roots.append(root) | |
337 | |
338 anyerrors = [] | |
339 for pos, item in enumerate(vdoc): | |
340 sourceline = SourceLine(vdoc, pos, str) | |
341 success = False | |
342 for root in roots: | |
343 success = validate.validate_ex( | |
344 root, | |
345 item, | |
346 loader.identifiers, | |
347 strict, | |
348 foreign_properties=loader.foreign_properties, | |
349 raise_ex=False, | |
350 skip_foreign_properties=loader.skip_schemas, | |
351 strict_foreign_properties=strict_foreign_properties, | |
352 ) | |
353 if success: | |
354 break | |
355 | |
356 if not success: | |
357 errors = [] # type: List[SchemaSaladException] | |
358 for root in roots: | |
359 if hasattr(root, "get_prop"): | |
360 name = root.get_prop("name") | |
361 elif hasattr(root, "name"): | |
362 name = root.name | |
363 | |
364 try: | |
365 validate.validate_ex( | |
366 root, | |
367 item, | |
368 loader.identifiers, | |
369 strict, | |
370 foreign_properties=loader.foreign_properties, | |
371 raise_ex=True, | |
372 skip_foreign_properties=loader.skip_schemas, | |
373 strict_foreign_properties=strict_foreign_properties, | |
374 ) | |
375 except ClassValidationException as exc1: | |
376 errors = [ | |
377 ClassValidationException( | |
378 f"tried `{name}` but", sourceline, [exc1] | |
379 ) | |
380 ] | |
381 break | |
382 except ValidationException as exc2: | |
383 errors.append( | |
384 ValidationException(f"tried `{name}` but", sourceline, [exc2]) | |
385 ) | |
386 | |
387 objerr = "Invalid" | |
388 for ident in loader.identifiers: | |
389 if ident in item: | |
390 objerr = "Object `{}` is not valid because".format( | |
391 relname(item[ident]) | |
392 ) | |
393 break | |
394 anyerrors.append(ValidationException(objerr, sourceline, errors, "-")) | |
395 if anyerrors: | |
396 raise ValidationException("", None, anyerrors, "*") | |
397 | |
398 | |
399 def get_anon_name( | |
400 rec: MutableMapping[str, Union[str, Dict[str, str], List[str]]] | |
401 ) -> str: | |
402 """Calculate a reproducible name for anonymous types.""" | |
403 if "name" in rec: | |
404 name = rec["name"] | |
405 if isinstance(name, str): | |
406 return name | |
407 raise ValidationException(f"Expected name field to be a string, was {name}") | |
408 anon_name = "" | |
409 if rec["type"] in ("enum", saladp + "enum"): | |
410 for sym in rec["symbols"]: | |
411 anon_name += sym | |
412 return "enum_" + hashlib.sha1(anon_name.encode("UTF-8")).hexdigest() # nosec | |
413 if rec["type"] in ("record", saladp + "record"): | |
414 for field in rec["fields"]: | |
415 if isinstance(field, Mapping): | |
416 anon_name += field["name"] | |
417 else: | |
418 raise ValidationException( | |
419 "Expected entries in 'fields' to also be maps, was {}.".format( | |
420 field | |
421 ) | |
422 ) | |
423 return "record_" + hashlib.sha1(anon_name.encode("UTF-8")).hexdigest() # nosec | |
424 if rec["type"] in ("array", saladp + "array"): | |
425 return "" | |
426 raise ValidationException("Expected enum or record, was {}".format(rec["type"])) | |
427 | |
428 | |
429 def replace_type( | |
430 items: Any, | |
431 spec: Dict[str, Any], | |
432 loader: Loader, | |
433 found: Set[str], | |
434 find_embeds: bool = True, | |
435 deepen: bool = True, | |
436 ) -> Any: | |
437 """ Go through and replace types in the 'spec' mapping""" | |
438 | |
439 if isinstance(items, MutableMapping): | |
440 # recursively check these fields for types to replace | |
441 if items.get("type") in ("record", "enum") and items.get("name"): | |
442 if items["name"] in found: | |
443 return items["name"] | |
444 found.add(items["name"]) | |
445 | |
446 if not deepen: | |
447 return items | |
448 | |
449 items = copy.copy(items) | |
450 if not items.get("name"): | |
451 items["name"] = get_anon_name(items) | |
452 for name in ("type", "items", "fields"): | |
453 if name in items: | |
454 items[name] = replace_type( | |
455 items[name], | |
456 spec, | |
457 loader, | |
458 found, | |
459 find_embeds=find_embeds, | |
460 deepen=find_embeds, | |
461 ) | |
462 if isinstance(items[name], MutableSequence): | |
463 items[name] = flatten(items[name]) | |
464 | |
465 return items | |
466 if isinstance(items, MutableSequence): | |
467 # recursively transform list | |
468 return [ | |
469 replace_type(i, spec, loader, found, find_embeds=find_embeds, deepen=deepen) | |
470 for i in items | |
471 ] | |
472 if isinstance(items, str): | |
473 # found a string which is a symbol corresponding to a type. | |
474 replace_with = None | |
475 if items in loader.vocab: | |
476 # If it's a vocabulary term, first expand it to its fully qualified | |
477 # URI | |
478 items = loader.vocab[items] | |
479 | |
480 if items in spec: | |
481 # Look up in specialization map | |
482 replace_with = spec[items] | |
483 | |
484 if replace_with: | |
485 return replace_type( | |
486 replace_with, spec, loader, found, find_embeds=find_embeds | |
487 ) | |
488 found.add(items) | |
489 return items | |
490 | |
491 | |
492 def avro_name(url: str) -> str: | |
493 """ | |
494 Turn a URL into an Avro-safe name. | |
495 | |
496 If the URL has no fragment, return this plain URL. | |
497 | |
498 Extract either the last part of the URL fragment past the slash, otherwise | |
499 the whole fragment. | |
500 """ | |
501 frg = urldefrag(url)[1] | |
502 if frg != "": | |
503 if "/" in frg: | |
504 return frg[frg.rindex("/") + 1 :] | |
505 return frg | |
506 return url | |
507 | |
508 | |
509 Avro = TypeVar("Avro", MutableMapping[str, Any], MutableSequence[Any], str) | |
510 | |
511 | |
512 def make_valid_avro( | |
513 items: Avro, | |
514 alltypes: Dict[str, Dict[str, Any]], | |
515 found: Set[str], | |
516 union: bool = False, | |
517 ) -> Union[ | |
518 Avro, MutableMapping[str, str], str, List[Union[Any, MutableMapping[str, str], str]] | |
519 ]: | |
520 """Convert our schema to be more avro like.""" | |
521 # Possibly could be integrated into our fork of avro/schema.py? | |
522 if isinstance(items, MutableMapping): | |
523 avro = copy.copy(items) | |
524 if avro.get("name") and avro.get("inVocab", True): | |
525 avro["name"] = avro_name(avro["name"]) | |
526 | |
527 if "type" in avro and avro["type"] in ( | |
528 saladp + "record", | |
529 saladp + "enum", | |
530 "record", | |
531 "enum", | |
532 ): | |
533 if (hasattr(avro, "get") and avro.get("abstract")) or ("abstract" in avro): | |
534 return avro | |
535 if avro["name"] in found: | |
536 return cast(str, avro["name"]) | |
537 found.add(avro["name"]) | |
538 for field in ("type", "items", "values", "fields"): | |
539 if field in avro: | |
540 avro[field] = make_valid_avro(avro[field], alltypes, found, union=True) | |
541 if "symbols" in avro: | |
542 avro["symbols"] = [avro_name(sym) for sym in avro["symbols"]] | |
543 return avro | |
544 if isinstance(items, MutableSequence): | |
545 ret = [] | |
546 for i in items: | |
547 ret.append(make_valid_avro(i, alltypes, found, union=union)) | |
548 return ret | |
549 if union and isinstance(items, str): | |
550 if items in alltypes and avro_name(items) not in found: | |
551 return make_valid_avro(alltypes[items], alltypes, found, union=union) | |
552 return avro_name(items) | |
553 else: | |
554 return items | |
555 | |
556 | |
557 def deepcopy_strip(item: Any) -> Any: | |
558 """ | |
559 Make a deep copy of list and dict objects. | |
560 | |
561 Intentionally do not copy attributes. This is to discard CommentedMap and | |
562 CommentedSeq metadata which is very expensive with regular copy.deepcopy. | |
563 """ | |
564 | |
565 if isinstance(item, MutableMapping): | |
566 return {k: deepcopy_strip(v) for k, v in item.items()} | |
567 if isinstance(item, MutableSequence): | |
568 return [deepcopy_strip(k) for k in item] | |
569 return item | |
570 | |
571 | |
572 def extend_and_specialize( | |
573 items: List[Dict[str, Any]], loader: Loader | |
574 ) -> List[Dict[str, Any]]: | |
575 """ | |
576 Apply 'extend' and 'specialize' to fully materialize derived record types. | |
577 """ | |
578 | |
579 items2 = deepcopy_strip(items) | |
580 types = {i["name"]: i for i in items2} # type: Dict[str, Any] | |
581 results = [] | |
582 | |
583 for stype in items2: | |
584 if "extends" in stype: | |
585 specs = {} # type: Dict[str, str] | |
586 if "specialize" in stype: | |
587 for spec in aslist(stype["specialize"]): | |
588 specs[spec["specializeFrom"]] = spec["specializeTo"] | |
589 | |
590 exfields = [] # type: List[str] | |
591 exsym = [] # type: List[str] | |
592 for ex in aslist(stype["extends"]): | |
593 if ex not in types: | |
594 raise ValidationException( | |
595 "Extends {} in {} refers to invalid base type.".format( | |
596 stype["extends"], stype["name"] | |
597 ) | |
598 ) | |
599 | |
600 basetype = copy.copy(types[ex]) | |
601 | |
602 if stype["type"] == "record": | |
603 if specs: | |
604 basetype["fields"] = replace_type( | |
605 basetype.get("fields", []), specs, loader, set() | |
606 ) | |
607 | |
608 for field in basetype.get("fields", []): | |
609 if "inherited_from" not in field: | |
610 field["inherited_from"] = ex | |
611 | |
612 exfields.extend(basetype.get("fields", [])) | |
613 elif stype["type"] == "enum": | |
614 exsym.extend(basetype.get("symbols", [])) | |
615 | |
616 if stype["type"] == "record": | |
617 stype = copy.copy(stype) | |
618 exfields.extend(stype.get("fields", [])) | |
619 stype["fields"] = exfields | |
620 | |
621 fieldnames = set() # type: Set[str] | |
622 for field in stype["fields"]: | |
623 if field["name"] in fieldnames: | |
624 raise ValidationException( | |
625 "Field name {} appears twice in {}".format( | |
626 field["name"], stype["name"] | |
627 ) | |
628 ) | |
629 else: | |
630 fieldnames.add(field["name"]) | |
631 elif stype["type"] == "enum": | |
632 stype = copy.copy(stype) | |
633 exsym.extend(stype.get("symbols", [])) | |
634 stype["symbol"] = exsym | |
635 | |
636 types[stype["name"]] = stype | |
637 | |
638 results.append(stype) | |
639 | |
640 ex_types = {} | |
641 for result in results: | |
642 ex_types[result["name"]] = result | |
643 | |
644 extended_by = {} # type: Dict[str, str] | |
645 for result in results: | |
646 if "extends" in result: | |
647 for ex in aslist(result["extends"]): | |
648 if ex_types[ex].get("abstract"): | |
649 add_dictlist(extended_by, ex, ex_types[result["name"]]) | |
650 add_dictlist(extended_by, avro_name(ex), ex_types[ex]) | |
651 | |
652 for result in results: | |
653 if result.get("abstract") and result["name"] not in extended_by: | |
654 raise ValidationException( | |
655 "{} is abstract but missing a concrete subtype".format(result["name"]) | |
656 ) | |
657 | |
658 for result in results: | |
659 if "fields" in result: | |
660 result["fields"] = replace_type( | |
661 result["fields"], extended_by, loader, set() | |
662 ) | |
663 | |
664 return results | |
665 | |
666 | |
667 def make_avro( | |
668 i: List[Dict[str, Any]], | |
669 loader: Loader, | |
670 ) -> List[Any]: | |
671 | |
672 j = extend_and_specialize(i, loader) | |
673 | |
674 name_dict = {} # type: Dict[str, Dict[str, Any]] | |
675 for entry in j: | |
676 name_dict[entry["name"]] = entry | |
677 avro = make_valid_avro(j, name_dict, set()) | |
678 | |
679 return [ | |
680 t | |
681 for t in avro | |
682 if isinstance(t, MutableMapping) | |
683 and not t.get("abstract") | |
684 and t.get("type") != "documentation" | |
685 ] | |
686 | |
687 | |
688 def make_avro_schema( | |
689 i: List[Any], | |
690 loader: Loader, | |
691 ) -> Names: | |
692 """ | |
693 All in one convenience function. | |
694 | |
695 Call make_avro() and make_avro_schema_from_avro() separately if you need | |
696 the intermediate result for diagnostic output. | |
697 """ | |
698 names = Names() | |
699 avro = make_avro(i, loader) | |
700 make_avsc_object(convert_to_dict(avro), names) | |
701 return names | |
702 | |
703 | |
704 def make_avro_schema_from_avro(avro: List[Union[Avro, Dict[str, str], str]]) -> Names: | |
705 names = Names() | |
706 make_avsc_object(convert_to_dict(avro), names) | |
707 return names | |
708 | |
709 | |
710 def shortname(inputid: str) -> str: | |
711 """Returns the last segment of the provided fragment or path.""" | |
712 parsed_id = urlparse(inputid) | |
713 if parsed_id.fragment: | |
714 return parsed_id.fragment.split("/")[-1] | |
715 return parsed_id.path.split("/")[-1] | |
716 | |
717 | |
718 def print_inheritance(doc: List[Dict[str, Any]], stream: IO[Any]) -> None: | |
719 """Write a Grapviz inheritance graph for the supplied document.""" | |
720 stream.write("digraph {\n") | |
721 for entry in doc: | |
722 if entry["type"] == "record": | |
723 label = name = shortname(entry["name"]) | |
724 fields = entry.get("fields", []) | |
725 if fields: | |
726 label += "\\n* {}\\l".format( | |
727 "\\l* ".join(shortname(field["name"]) for field in fields) | |
728 ) | |
729 shape = "ellipse" if entry.get("abstract") else "box" | |
730 stream.write(f'"{name}" [shape={shape} label="{label}"];\n') | |
731 if "extends" in entry: | |
732 for target in aslist(entry["extends"]): | |
733 stream.write('"{}" -> "{}";\n'.format(shortname(target), name)) | |
734 stream.write("}\n") | |
735 | |
736 | |
737 def print_fieldrefs(doc: List[Dict[str, Any]], loader: Loader, stream: IO[Any]) -> None: | |
738 """Write a GraphViz graph of the relationships between the fields.""" | |
739 obj = extend_and_specialize(doc, loader) | |
740 | |
741 primitives = { | |
742 "http://www.w3.org/2001/XMLSchema#string", | |
743 "http://www.w3.org/2001/XMLSchema#boolean", | |
744 "http://www.w3.org/2001/XMLSchema#int", | |
745 "http://www.w3.org/2001/XMLSchema#long", | |
746 saladp + "null", | |
747 saladp + "enum", | |
748 saladp + "array", | |
749 saladp + "record", | |
750 saladp + "Any", | |
751 } | |
752 | |
753 stream.write("digraph {\n") | |
754 for entry in obj: | |
755 if entry.get("abstract"): | |
756 continue | |
757 if entry["type"] == "record": | |
758 label = shortname(entry["name"]) | |
759 for field in entry.get("fields", []): | |
760 found = set() # type: Set[str] | |
761 field_name = shortname(field["name"]) | |
762 replace_type(field["type"], {}, loader, found, find_embeds=False) | |
763 for each_type in found: | |
764 if each_type not in primitives: | |
765 stream.write( | |
766 '"{}" -> "{}" [label="{}"];\n'.format( | |
767 label, shortname(each_type), field_name | |
768 ) | |
769 ) | |
770 stream.write("}\n") |