Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/schema_salad/schema.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 """Functions to process Schema Salad schemas.""" | |
| 2 | |
| 3 import copy | |
| 4 import hashlib | |
| 5 from typing import ( | |
| 6 IO, | |
| 7 Any, | |
| 8 Dict, | |
| 9 List, | |
| 10 Mapping, | |
| 11 MutableMapping, | |
| 12 MutableSequence, | |
| 13 Optional, | |
| 14 Set, | |
| 15 Tuple, | |
| 16 TypeVar, | |
| 17 Union, | |
| 18 cast, | |
| 19 ) | |
| 20 from urllib.parse import urldefrag, urlparse | |
| 21 | |
| 22 from pkg_resources import resource_stream | |
| 23 from ruamel import yaml | |
| 24 from ruamel.yaml.comments import CommentedMap, CommentedSeq | |
| 25 | |
| 26 from schema_salad.utils import ( | |
| 27 CacheType, | |
| 28 ResolveType, | |
| 29 add_dictlist, | |
| 30 aslist, | |
| 31 convert_to_dict, | |
| 32 flatten, | |
| 33 json_dumps, | |
| 34 ) | |
| 35 | |
| 36 from . import _logger, jsonld_context, ref_resolver, validate | |
| 37 from .avro.schema import Names, SchemaParseException, make_avsc_object | |
| 38 from .exceptions import ( | |
| 39 ClassValidationException, | |
| 40 SchemaSaladException, | |
| 41 ValidationException, | |
| 42 ) | |
| 43 from .ref_resolver import Loader | |
| 44 from .sourceline import SourceLine, add_lc_filename, relname | |
| 45 | |
| 46 SALAD_FILES = ( | |
| 47 "metaschema.yml", | |
| 48 "metaschema_base.yml", | |
| 49 "salad.md", | |
| 50 "field_name.yml", | |
| 51 "import_include.md", | |
| 52 "link_res.yml", | |
| 53 "ident_res.yml", | |
| 54 "vocab_res.yml", | |
| 55 "vocab_res.yml", | |
| 56 "field_name_schema.yml", | |
| 57 "field_name_src.yml", | |
| 58 "field_name_proc.yml", | |
| 59 "ident_res_schema.yml", | |
| 60 "ident_res_src.yml", | |
| 61 "ident_res_proc.yml", | |
| 62 "link_res_schema.yml", | |
| 63 "link_res_src.yml", | |
| 64 "link_res_proc.yml", | |
| 65 "vocab_res_schema.yml", | |
| 66 "vocab_res_src.yml", | |
| 67 "vocab_res_proc.yml", | |
| 68 "map_res.yml", | |
| 69 "map_res_schema.yml", | |
| 70 "map_res_src.yml", | |
| 71 "map_res_proc.yml", | |
| 72 "typedsl_res.yml", | |
| 73 "typedsl_res_schema.yml", | |
| 74 "typedsl_res_src.yml", | |
| 75 "typedsl_res_proc.yml", | |
| 76 "sfdsl_res.yml", | |
| 77 "sfdsl_res_schema.yml", | |
| 78 "sfdsl_res_src.yml", | |
| 79 "sfdsl_res_proc.yml", | |
| 80 ) | |
| 81 | |
| 82 saladp = "https://w3id.org/cwl/salad#" | |
| 83 | |
| 84 | |
| 85 def get_metaschema() -> Tuple[Names, List[Dict[str, str]], Loader]: | |
| 86 """Instantiate the metaschema.""" | |
| 87 loader = ref_resolver.Loader( | |
| 88 { | |
| 89 "Any": saladp + "Any", | |
| 90 "ArraySchema": saladp + "ArraySchema", | |
| 91 "Array_symbol": saladp + "ArraySchema/type/Array_symbol", | |
| 92 "DocType": saladp + "DocType", | |
| 93 "Documentation": saladp + "Documentation", | |
| 94 "Documentation_symbol": saladp + "Documentation/type/Documentation_symbol", | |
| 95 "Documented": saladp + "Documented", | |
| 96 "EnumSchema": saladp + "EnumSchema", | |
| 97 "Enum_symbol": saladp + "EnumSchema/type/Enum_symbol", | |
| 98 "JsonldPredicate": saladp + "JsonldPredicate", | |
| 99 "NamedType": saladp + "NamedType", | |
| 100 "PrimitiveType": saladp + "PrimitiveType", | |
| 101 "RecordField": saladp + "RecordField", | |
| 102 "RecordSchema": saladp + "RecordSchema", | |
| 103 "Record_symbol": saladp + "RecordSchema/type/Record_symbol", | |
| 104 "SaladEnumSchema": saladp + "SaladEnumSchema", | |
| 105 "SaladRecordField": saladp + "SaladRecordField", | |
| 106 "SaladRecordSchema": saladp + "SaladRecordSchema", | |
| 107 "SchemaDefinedType": saladp + "SchemaDefinedType", | |
| 108 "SpecializeDef": saladp + "SpecializeDef", | |
| 109 "_container": saladp + "JsonldPredicate/_container", | |
| 110 "_id": {"@id": saladp + "_id", "@type": "@id", "identity": True}, | |
| 111 "_type": saladp + "JsonldPredicate/_type", | |
| 112 "abstract": saladp + "SaladRecordSchema/abstract", | |
| 113 "array": saladp + "array", | |
| 114 "boolean": "http://www.w3.org/2001/XMLSchema#boolean", | |
| 115 "dct": "http://purl.org/dc/terms/", | |
| 116 "default": {"@id": saladp + "default", "noLinkCheck": True}, | |
| 117 "doc": "rdfs:comment", | |
| 118 "docAfter": {"@id": saladp + "docAfter", "@type": "@id"}, | |
| 119 "docChild": {"@id": saladp + "docChild", "@type": "@id"}, | |
| 120 "docParent": {"@id": saladp + "docParent", "@type": "@id"}, | |
| 121 "documentRoot": saladp + "SchemaDefinedType/documentRoot", | |
| 122 "documentation": saladp + "documentation", | |
| 123 "double": "http://www.w3.org/2001/XMLSchema#double", | |
| 124 "enum": saladp + "enum", | |
| 125 "extends": {"@id": saladp + "extends", "@type": "@id", "refScope": 1}, | |
| 126 "fields": { | |
| 127 "@id": saladp + "fields", | |
| 128 "mapPredicate": "type", | |
| 129 "mapSubject": "name", | |
| 130 }, | |
| 131 "float": "http://www.w3.org/2001/XMLSchema#float", | |
| 132 "identity": saladp + "JsonldPredicate/identity", | |
| 133 "inVocab": saladp + "NamedType/inVocab", | |
| 134 "int": "http://www.w3.org/2001/XMLSchema#int", | |
| 135 "items": {"@id": saladp + "items", "@type": "@vocab", "refScope": 2}, | |
| 136 "jsonldPredicate": "sld:jsonldPredicate", | |
| 137 "long": "http://www.w3.org/2001/XMLSchema#long", | |
| 138 "mapPredicate": saladp + "JsonldPredicate/mapPredicate", | |
| 139 "mapSubject": saladp + "JsonldPredicate/mapSubject", | |
| 140 "name": "@id", | |
| 141 "noLinkCheck": saladp + "JsonldPredicate/noLinkCheck", | |
| 142 "null": saladp + "null", | |
| 143 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", | |
| 144 "rdfs": "http://www.w3.org/2000/01/rdf-schema#", | |
| 145 "record": saladp + "record", | |
| 146 "refScope": saladp + "JsonldPredicate/refScope", | |
| 147 "sld": saladp, | |
| 148 "specialize": { | |
| 149 "@id": saladp + "specialize", | |
| 150 "mapPredicate": "specializeTo", | |
| 151 "mapSubject": "specializeFrom", | |
| 152 }, | |
| 153 "specializeFrom": { | |
| 154 "@id": saladp + "specializeFrom", | |
| 155 "@type": "@id", | |
| 156 "refScope": 1, | |
| 157 }, | |
| 158 "specializeTo": { | |
| 159 "@id": saladp + "specializeTo", | |
| 160 "@type": "@id", | |
| 161 "refScope": 1, | |
| 162 }, | |
| 163 "string": "http://www.w3.org/2001/XMLSchema#string", | |
| 164 "subscope": saladp + "JsonldPredicate/subscope", | |
| 165 "symbols": {"@id": saladp + "symbols", "@type": "@id", "identity": True}, | |
| 166 "type": { | |
| 167 "@id": saladp + "type", | |
| 168 "@type": "@vocab", | |
| 169 "refScope": 2, | |
| 170 "typeDSL": True, | |
| 171 }, | |
| 172 "typeDSL": saladp + "JsonldPredicate/typeDSL", | |
| 173 "xsd": "http://www.w3.org/2001/XMLSchema#", | |
| 174 } | |
| 175 ) | |
| 176 | |
| 177 for salad in SALAD_FILES: | |
| 178 with resource_stream("schema_salad", "metaschema/" + salad) as stream: | |
| 179 loader.cache["https://w3id.org/cwl/" + salad] = stream.read().decode( | |
| 180 "UTF-8" | |
| 181 ) | |
| 182 | |
| 183 with resource_stream("schema_salad", "metaschema/metaschema.yml") as stream: | |
| 184 loader.cache["https://w3id.org/cwl/salad"] = stream.read().decode("UTF-8") | |
| 185 | |
| 186 j = yaml.main.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) | |
| 187 add_lc_filename(j, "metaschema.yml") | |
| 188 j2 = loader.resolve_all(j, saladp)[0] | |
| 189 | |
| 190 if not isinstance(j2, list): | |
| 191 _logger.error("%s", j2) | |
| 192 raise SchemaParseException(f"Not a list: {j2}") | |
| 193 else: | |
| 194 sch_obj = make_avro(j2, loader) | |
| 195 try: | |
| 196 sch_names = make_avro_schema_from_avro(sch_obj) | |
| 197 except SchemaParseException: | |
| 198 _logger.error("Metaschema error, avro was:\n%s", json_dumps(sch_obj, indent=4)) | |
| 199 raise | |
| 200 validate_doc(sch_names, j2, loader, strict=True) | |
| 201 return (sch_names, j2, loader) | |
| 202 | |
| 203 | |
| 204 def add_namespaces( | |
| 205 metadata: Mapping[str, Any], namespaces: MutableMapping[str, str] | |
| 206 ) -> None: | |
| 207 """Collect the provided namespaces, checking for conflicts.""" | |
| 208 for key, value in metadata.items(): | |
| 209 if key not in namespaces: | |
| 210 namespaces[key] = value | |
| 211 elif namespaces[key] != value: | |
| 212 raise ValidationException( | |
| 213 "Namespace prefix '{}' has conflicting definitions '{}'" | |
| 214 " and '{}'.".format(key, namespaces[key], value) | |
| 215 ) | |
| 216 | |
| 217 | |
| 218 def collect_namespaces(metadata: Mapping[str, Any]) -> Dict[str, str]: | |
| 219 """Walk through the metadata object, collecting namespace declarations.""" | |
| 220 namespaces = {} # type: Dict[str, str] | |
| 221 if "$import_metadata" in metadata: | |
| 222 for value in metadata["$import_metadata"].values(): | |
| 223 add_namespaces(collect_namespaces(value), namespaces) | |
| 224 if "$namespaces" in metadata: | |
| 225 add_namespaces(metadata["$namespaces"], namespaces) | |
| 226 return namespaces | |
| 227 | |
| 228 | |
| 229 schema_type = Tuple[Loader, Union[Names, SchemaParseException], Dict[str, Any], Loader] | |
| 230 | |
| 231 | |
| 232 def load_schema( | |
| 233 schema_ref: ResolveType, | |
| 234 cache: Optional[CacheType] = None, | |
| 235 ) -> schema_type: | |
| 236 """ | |
| 237 Load a schema that can be used to validate documents using load_and_validate. | |
| 238 | |
| 239 return: document_loader, avsc_names, schema_metadata, metaschema_loader | |
| 240 """ | |
| 241 | |
| 242 metaschema_names, _metaschema_doc, metaschema_loader = get_metaschema() | |
| 243 if cache is not None: | |
| 244 metaschema_loader.cache.update(cache) | |
| 245 schema_doc, schema_metadata = metaschema_loader.resolve_ref(schema_ref, "") | |
| 246 | |
| 247 if not isinstance(schema_doc, MutableSequence): | |
| 248 raise ValidationException("Schema reference must resolve to a list.") | |
| 249 | |
| 250 validate_doc(metaschema_names, schema_doc, metaschema_loader, True) | |
| 251 metactx = schema_metadata.get("@context", {}) | |
| 252 metactx.update(collect_namespaces(schema_metadata)) | |
| 253 schema_ctx = jsonld_context.salad_to_jsonld_context(schema_doc, metactx)[0] | |
| 254 | |
| 255 # Create the loader that will be used to load the target document. | |
| 256 document_loader = Loader(schema_ctx, cache=cache) | |
| 257 | |
| 258 # Make the Avro validation that will be used to validate the target | |
| 259 # document | |
| 260 avsc_names = make_avro_schema(schema_doc, document_loader) | |
| 261 | |
| 262 return document_loader, avsc_names, schema_metadata, metaschema_loader | |
| 263 | |
| 264 | |
| 265 def load_and_validate( | |
| 266 document_loader: Loader, | |
| 267 avsc_names: Names, | |
| 268 document: Union[CommentedMap, str], | |
| 269 strict: bool, | |
| 270 strict_foreign_properties: bool = False, | |
| 271 ) -> Tuple[Any, Dict[str, Any]]: | |
| 272 """Load a document and validate it with the provided schema. | |
| 273 | |
| 274 return data, metadata | |
| 275 """ | |
| 276 try: | |
| 277 if isinstance(document, CommentedMap): | |
| 278 data, metadata = document_loader.resolve_all( | |
| 279 document, | |
| 280 document["id"], | |
| 281 checklinks=True, | |
| 282 strict_foreign_properties=strict_foreign_properties, | |
| 283 ) | |
| 284 else: | |
| 285 data, metadata = document_loader.resolve_ref( | |
| 286 document, | |
| 287 checklinks=True, | |
| 288 strict_foreign_properties=strict_foreign_properties, | |
| 289 ) | |
| 290 | |
| 291 validate_doc( | |
| 292 avsc_names, | |
| 293 data, | |
| 294 document_loader, | |
| 295 strict, | |
| 296 strict_foreign_properties=strict_foreign_properties, | |
| 297 ) | |
| 298 except ValidationException as exc: | |
| 299 raise ValidationException("", None, [exc]) from exc | |
| 300 return data, metadata | |
| 301 | |
| 302 | |
| 303 def validate_doc( | |
| 304 schema_names: Names, | |
| 305 doc: ResolveType, | |
| 306 loader: Loader, | |
| 307 strict: bool, | |
| 308 strict_foreign_properties: bool = False, | |
| 309 ) -> None: | |
| 310 """Validate a document using the provided schema.""" | |
| 311 has_root = False | |
| 312 for root in schema_names.names.values(): | |
| 313 if (hasattr(root, "get_prop") and root.get_prop("documentRoot")) or ( | |
| 314 "documentRoot" in root.props | |
| 315 ): | |
| 316 has_root = True | |
| 317 break | |
| 318 | |
| 319 if not has_root: | |
| 320 raise ValidationException("No document roots defined in the schema") | |
| 321 | |
| 322 if isinstance(doc, MutableSequence): | |
| 323 vdoc = doc | |
| 324 elif isinstance(doc, CommentedMap): | |
| 325 vdoc = CommentedSeq([doc]) | |
| 326 vdoc.lc.add_kv_line_col(0, [doc.lc.line, doc.lc.col]) | |
| 327 vdoc.lc.filename = doc.lc.filename | |
| 328 else: | |
| 329 raise ValidationException("Document must be dict or list") | |
| 330 | |
| 331 roots = [] | |
| 332 for root in schema_names.names.values(): | |
| 333 if (hasattr(root, "get_prop") and root.get_prop("documentRoot")) or ( | |
| 334 root.props.get("documentRoot") | |
| 335 ): | |
| 336 roots.append(root) | |
| 337 | |
| 338 anyerrors = [] | |
| 339 for pos, item in enumerate(vdoc): | |
| 340 sourceline = SourceLine(vdoc, pos, str) | |
| 341 success = False | |
| 342 for root in roots: | |
| 343 success = validate.validate_ex( | |
| 344 root, | |
| 345 item, | |
| 346 loader.identifiers, | |
| 347 strict, | |
| 348 foreign_properties=loader.foreign_properties, | |
| 349 raise_ex=False, | |
| 350 skip_foreign_properties=loader.skip_schemas, | |
| 351 strict_foreign_properties=strict_foreign_properties, | |
| 352 ) | |
| 353 if success: | |
| 354 break | |
| 355 | |
| 356 if not success: | |
| 357 errors = [] # type: List[SchemaSaladException] | |
| 358 for root in roots: | |
| 359 if hasattr(root, "get_prop"): | |
| 360 name = root.get_prop("name") | |
| 361 elif hasattr(root, "name"): | |
| 362 name = root.name | |
| 363 | |
| 364 try: | |
| 365 validate.validate_ex( | |
| 366 root, | |
| 367 item, | |
| 368 loader.identifiers, | |
| 369 strict, | |
| 370 foreign_properties=loader.foreign_properties, | |
| 371 raise_ex=True, | |
| 372 skip_foreign_properties=loader.skip_schemas, | |
| 373 strict_foreign_properties=strict_foreign_properties, | |
| 374 ) | |
| 375 except ClassValidationException as exc1: | |
| 376 errors = [ | |
| 377 ClassValidationException( | |
| 378 f"tried `{name}` but", sourceline, [exc1] | |
| 379 ) | |
| 380 ] | |
| 381 break | |
| 382 except ValidationException as exc2: | |
| 383 errors.append( | |
| 384 ValidationException(f"tried `{name}` but", sourceline, [exc2]) | |
| 385 ) | |
| 386 | |
| 387 objerr = "Invalid" | |
| 388 for ident in loader.identifiers: | |
| 389 if ident in item: | |
| 390 objerr = "Object `{}` is not valid because".format( | |
| 391 relname(item[ident]) | |
| 392 ) | |
| 393 break | |
| 394 anyerrors.append(ValidationException(objerr, sourceline, errors, "-")) | |
| 395 if anyerrors: | |
| 396 raise ValidationException("", None, anyerrors, "*") | |
| 397 | |
| 398 | |
| 399 def get_anon_name( | |
| 400 rec: MutableMapping[str, Union[str, Dict[str, str], List[str]]] | |
| 401 ) -> str: | |
| 402 """Calculate a reproducible name for anonymous types.""" | |
| 403 if "name" in rec: | |
| 404 name = rec["name"] | |
| 405 if isinstance(name, str): | |
| 406 return name | |
| 407 raise ValidationException(f"Expected name field to be a string, was {name}") | |
| 408 anon_name = "" | |
| 409 if rec["type"] in ("enum", saladp + "enum"): | |
| 410 for sym in rec["symbols"]: | |
| 411 anon_name += sym | |
| 412 return "enum_" + hashlib.sha1(anon_name.encode("UTF-8")).hexdigest() # nosec | |
| 413 if rec["type"] in ("record", saladp + "record"): | |
| 414 for field in rec["fields"]: | |
| 415 if isinstance(field, Mapping): | |
| 416 anon_name += field["name"] | |
| 417 else: | |
| 418 raise ValidationException( | |
| 419 "Expected entries in 'fields' to also be maps, was {}.".format( | |
| 420 field | |
| 421 ) | |
| 422 ) | |
| 423 return "record_" + hashlib.sha1(anon_name.encode("UTF-8")).hexdigest() # nosec | |
| 424 if rec["type"] in ("array", saladp + "array"): | |
| 425 return "" | |
| 426 raise ValidationException("Expected enum or record, was {}".format(rec["type"])) | |
| 427 | |
| 428 | |
| 429 def replace_type( | |
| 430 items: Any, | |
| 431 spec: Dict[str, Any], | |
| 432 loader: Loader, | |
| 433 found: Set[str], | |
| 434 find_embeds: bool = True, | |
| 435 deepen: bool = True, | |
| 436 ) -> Any: | |
| 437 """ Go through and replace types in the 'spec' mapping""" | |
| 438 | |
| 439 if isinstance(items, MutableMapping): | |
| 440 # recursively check these fields for types to replace | |
| 441 if items.get("type") in ("record", "enum") and items.get("name"): | |
| 442 if items["name"] in found: | |
| 443 return items["name"] | |
| 444 found.add(items["name"]) | |
| 445 | |
| 446 if not deepen: | |
| 447 return items | |
| 448 | |
| 449 items = copy.copy(items) | |
| 450 if not items.get("name"): | |
| 451 items["name"] = get_anon_name(items) | |
| 452 for name in ("type", "items", "fields"): | |
| 453 if name in items: | |
| 454 items[name] = replace_type( | |
| 455 items[name], | |
| 456 spec, | |
| 457 loader, | |
| 458 found, | |
| 459 find_embeds=find_embeds, | |
| 460 deepen=find_embeds, | |
| 461 ) | |
| 462 if isinstance(items[name], MutableSequence): | |
| 463 items[name] = flatten(items[name]) | |
| 464 | |
| 465 return items | |
| 466 if isinstance(items, MutableSequence): | |
| 467 # recursively transform list | |
| 468 return [ | |
| 469 replace_type(i, spec, loader, found, find_embeds=find_embeds, deepen=deepen) | |
| 470 for i in items | |
| 471 ] | |
| 472 if isinstance(items, str): | |
| 473 # found a string which is a symbol corresponding to a type. | |
| 474 replace_with = None | |
| 475 if items in loader.vocab: | |
| 476 # If it's a vocabulary term, first expand it to its fully qualified | |
| 477 # URI | |
| 478 items = loader.vocab[items] | |
| 479 | |
| 480 if items in spec: | |
| 481 # Look up in specialization map | |
| 482 replace_with = spec[items] | |
| 483 | |
| 484 if replace_with: | |
| 485 return replace_type( | |
| 486 replace_with, spec, loader, found, find_embeds=find_embeds | |
| 487 ) | |
| 488 found.add(items) | |
| 489 return items | |
| 490 | |
| 491 | |
| 492 def avro_name(url: str) -> str: | |
| 493 """ | |
| 494 Turn a URL into an Avro-safe name. | |
| 495 | |
| 496 If the URL has no fragment, return this plain URL. | |
| 497 | |
| 498 Extract either the last part of the URL fragment past the slash, otherwise | |
| 499 the whole fragment. | |
| 500 """ | |
| 501 frg = urldefrag(url)[1] | |
| 502 if frg != "": | |
| 503 if "/" in frg: | |
| 504 return frg[frg.rindex("/") + 1 :] | |
| 505 return frg | |
| 506 return url | |
| 507 | |
| 508 | |
| 509 Avro = TypeVar("Avro", MutableMapping[str, Any], MutableSequence[Any], str) | |
| 510 | |
| 511 | |
| 512 def make_valid_avro( | |
| 513 items: Avro, | |
| 514 alltypes: Dict[str, Dict[str, Any]], | |
| 515 found: Set[str], | |
| 516 union: bool = False, | |
| 517 ) -> Union[ | |
| 518 Avro, MutableMapping[str, str], str, List[Union[Any, MutableMapping[str, str], str]] | |
| 519 ]: | |
| 520 """Convert our schema to be more avro like.""" | |
| 521 # Possibly could be integrated into our fork of avro/schema.py? | |
| 522 if isinstance(items, MutableMapping): | |
| 523 avro = copy.copy(items) | |
| 524 if avro.get("name") and avro.get("inVocab", True): | |
| 525 avro["name"] = avro_name(avro["name"]) | |
| 526 | |
| 527 if "type" in avro and avro["type"] in ( | |
| 528 saladp + "record", | |
| 529 saladp + "enum", | |
| 530 "record", | |
| 531 "enum", | |
| 532 ): | |
| 533 if (hasattr(avro, "get") and avro.get("abstract")) or ("abstract" in avro): | |
| 534 return avro | |
| 535 if avro["name"] in found: | |
| 536 return cast(str, avro["name"]) | |
| 537 found.add(avro["name"]) | |
| 538 for field in ("type", "items", "values", "fields"): | |
| 539 if field in avro: | |
| 540 avro[field] = make_valid_avro(avro[field], alltypes, found, union=True) | |
| 541 if "symbols" in avro: | |
| 542 avro["symbols"] = [avro_name(sym) for sym in avro["symbols"]] | |
| 543 return avro | |
| 544 if isinstance(items, MutableSequence): | |
| 545 ret = [] | |
| 546 for i in items: | |
| 547 ret.append(make_valid_avro(i, alltypes, found, union=union)) | |
| 548 return ret | |
| 549 if union and isinstance(items, str): | |
| 550 if items in alltypes and avro_name(items) not in found: | |
| 551 return make_valid_avro(alltypes[items], alltypes, found, union=union) | |
| 552 return avro_name(items) | |
| 553 else: | |
| 554 return items | |
| 555 | |
| 556 | |
| 557 def deepcopy_strip(item: Any) -> Any: | |
| 558 """ | |
| 559 Make a deep copy of list and dict objects. | |
| 560 | |
| 561 Intentionally do not copy attributes. This is to discard CommentedMap and | |
| 562 CommentedSeq metadata which is very expensive with regular copy.deepcopy. | |
| 563 """ | |
| 564 | |
| 565 if isinstance(item, MutableMapping): | |
| 566 return {k: deepcopy_strip(v) for k, v in item.items()} | |
| 567 if isinstance(item, MutableSequence): | |
| 568 return [deepcopy_strip(k) for k in item] | |
| 569 return item | |
| 570 | |
| 571 | |
| 572 def extend_and_specialize( | |
| 573 items: List[Dict[str, Any]], loader: Loader | |
| 574 ) -> List[Dict[str, Any]]: | |
| 575 """ | |
| 576 Apply 'extend' and 'specialize' to fully materialize derived record types. | |
| 577 """ | |
| 578 | |
| 579 items2 = deepcopy_strip(items) | |
| 580 types = {i["name"]: i for i in items2} # type: Dict[str, Any] | |
| 581 results = [] | |
| 582 | |
| 583 for stype in items2: | |
| 584 if "extends" in stype: | |
| 585 specs = {} # type: Dict[str, str] | |
| 586 if "specialize" in stype: | |
| 587 for spec in aslist(stype["specialize"]): | |
| 588 specs[spec["specializeFrom"]] = spec["specializeTo"] | |
| 589 | |
| 590 exfields = [] # type: List[str] | |
| 591 exsym = [] # type: List[str] | |
| 592 for ex in aslist(stype["extends"]): | |
| 593 if ex not in types: | |
| 594 raise ValidationException( | |
| 595 "Extends {} in {} refers to invalid base type.".format( | |
| 596 stype["extends"], stype["name"] | |
| 597 ) | |
| 598 ) | |
| 599 | |
| 600 basetype = copy.copy(types[ex]) | |
| 601 | |
| 602 if stype["type"] == "record": | |
| 603 if specs: | |
| 604 basetype["fields"] = replace_type( | |
| 605 basetype.get("fields", []), specs, loader, set() | |
| 606 ) | |
| 607 | |
| 608 for field in basetype.get("fields", []): | |
| 609 if "inherited_from" not in field: | |
| 610 field["inherited_from"] = ex | |
| 611 | |
| 612 exfields.extend(basetype.get("fields", [])) | |
| 613 elif stype["type"] == "enum": | |
| 614 exsym.extend(basetype.get("symbols", [])) | |
| 615 | |
| 616 if stype["type"] == "record": | |
| 617 stype = copy.copy(stype) | |
| 618 exfields.extend(stype.get("fields", [])) | |
| 619 stype["fields"] = exfields | |
| 620 | |
| 621 fieldnames = set() # type: Set[str] | |
| 622 for field in stype["fields"]: | |
| 623 if field["name"] in fieldnames: | |
| 624 raise ValidationException( | |
| 625 "Field name {} appears twice in {}".format( | |
| 626 field["name"], stype["name"] | |
| 627 ) | |
| 628 ) | |
| 629 else: | |
| 630 fieldnames.add(field["name"]) | |
| 631 elif stype["type"] == "enum": | |
| 632 stype = copy.copy(stype) | |
| 633 exsym.extend(stype.get("symbols", [])) | |
| 634 stype["symbol"] = exsym | |
| 635 | |
| 636 types[stype["name"]] = stype | |
| 637 | |
| 638 results.append(stype) | |
| 639 | |
| 640 ex_types = {} | |
| 641 for result in results: | |
| 642 ex_types[result["name"]] = result | |
| 643 | |
| 644 extended_by = {} # type: Dict[str, str] | |
| 645 for result in results: | |
| 646 if "extends" in result: | |
| 647 for ex in aslist(result["extends"]): | |
| 648 if ex_types[ex].get("abstract"): | |
| 649 add_dictlist(extended_by, ex, ex_types[result["name"]]) | |
| 650 add_dictlist(extended_by, avro_name(ex), ex_types[ex]) | |
| 651 | |
| 652 for result in results: | |
| 653 if result.get("abstract") and result["name"] not in extended_by: | |
| 654 raise ValidationException( | |
| 655 "{} is abstract but missing a concrete subtype".format(result["name"]) | |
| 656 ) | |
| 657 | |
| 658 for result in results: | |
| 659 if "fields" in result: | |
| 660 result["fields"] = replace_type( | |
| 661 result["fields"], extended_by, loader, set() | |
| 662 ) | |
| 663 | |
| 664 return results | |
| 665 | |
| 666 | |
| 667 def make_avro( | |
| 668 i: List[Dict[str, Any]], | |
| 669 loader: Loader, | |
| 670 ) -> List[Any]: | |
| 671 | |
| 672 j = extend_and_specialize(i, loader) | |
| 673 | |
| 674 name_dict = {} # type: Dict[str, Dict[str, Any]] | |
| 675 for entry in j: | |
| 676 name_dict[entry["name"]] = entry | |
| 677 avro = make_valid_avro(j, name_dict, set()) | |
| 678 | |
| 679 return [ | |
| 680 t | |
| 681 for t in avro | |
| 682 if isinstance(t, MutableMapping) | |
| 683 and not t.get("abstract") | |
| 684 and t.get("type") != "documentation" | |
| 685 ] | |
| 686 | |
| 687 | |
| 688 def make_avro_schema( | |
| 689 i: List[Any], | |
| 690 loader: Loader, | |
| 691 ) -> Names: | |
| 692 """ | |
| 693 All in one convenience function. | |
| 694 | |
| 695 Call make_avro() and make_avro_schema_from_avro() separately if you need | |
| 696 the intermediate result for diagnostic output. | |
| 697 """ | |
| 698 names = Names() | |
| 699 avro = make_avro(i, loader) | |
| 700 make_avsc_object(convert_to_dict(avro), names) | |
| 701 return names | |
| 702 | |
| 703 | |
| 704 def make_avro_schema_from_avro(avro: List[Union[Avro, Dict[str, str], str]]) -> Names: | |
| 705 names = Names() | |
| 706 make_avsc_object(convert_to_dict(avro), names) | |
| 707 return names | |
| 708 | |
| 709 | |
| 710 def shortname(inputid: str) -> str: | |
| 711 """Returns the last segment of the provided fragment or path.""" | |
| 712 parsed_id = urlparse(inputid) | |
| 713 if parsed_id.fragment: | |
| 714 return parsed_id.fragment.split("/")[-1] | |
| 715 return parsed_id.path.split("/")[-1] | |
| 716 | |
| 717 | |
| 718 def print_inheritance(doc: List[Dict[str, Any]], stream: IO[Any]) -> None: | |
| 719 """Write a Grapviz inheritance graph for the supplied document.""" | |
| 720 stream.write("digraph {\n") | |
| 721 for entry in doc: | |
| 722 if entry["type"] == "record": | |
| 723 label = name = shortname(entry["name"]) | |
| 724 fields = entry.get("fields", []) | |
| 725 if fields: | |
| 726 label += "\\n* {}\\l".format( | |
| 727 "\\l* ".join(shortname(field["name"]) for field in fields) | |
| 728 ) | |
| 729 shape = "ellipse" if entry.get("abstract") else "box" | |
| 730 stream.write(f'"{name}" [shape={shape} label="{label}"];\n') | |
| 731 if "extends" in entry: | |
| 732 for target in aslist(entry["extends"]): | |
| 733 stream.write('"{}" -> "{}";\n'.format(shortname(target), name)) | |
| 734 stream.write("}\n") | |
| 735 | |
| 736 | |
| 737 def print_fieldrefs(doc: List[Dict[str, Any]], loader: Loader, stream: IO[Any]) -> None: | |
| 738 """Write a GraphViz graph of the relationships between the fields.""" | |
| 739 obj = extend_and_specialize(doc, loader) | |
| 740 | |
| 741 primitives = { | |
| 742 "http://www.w3.org/2001/XMLSchema#string", | |
| 743 "http://www.w3.org/2001/XMLSchema#boolean", | |
| 744 "http://www.w3.org/2001/XMLSchema#int", | |
| 745 "http://www.w3.org/2001/XMLSchema#long", | |
| 746 saladp + "null", | |
| 747 saladp + "enum", | |
| 748 saladp + "array", | |
| 749 saladp + "record", | |
| 750 saladp + "Any", | |
| 751 } | |
| 752 | |
| 753 stream.write("digraph {\n") | |
| 754 for entry in obj: | |
| 755 if entry.get("abstract"): | |
| 756 continue | |
| 757 if entry["type"] == "record": | |
| 758 label = shortname(entry["name"]) | |
| 759 for field in entry.get("fields", []): | |
| 760 found = set() # type: Set[str] | |
| 761 field_name = shortname(field["name"]) | |
| 762 replace_type(field["type"], {}, loader, found, find_embeds=False) | |
| 763 for each_type in found: | |
| 764 if each_type not in primitives: | |
| 765 stream.write( | |
| 766 '"{}" -> "{}" [label="{}"];\n'.format( | |
| 767 label, shortname(each_type), field_name | |
| 768 ) | |
| 769 ) | |
| 770 stream.write("}\n") |
