Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/schema_salad/schema.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 """Functions to process Schema Salad schemas.""" | |
2 from __future__ import absolute_import | |
3 | |
4 import copy | |
5 import hashlib | |
6 from typing import ( | |
7 IO, | |
8 Any, | |
9 Dict, | |
10 List, | |
11 Mapping, | |
12 MutableMapping, | |
13 MutableSequence, | |
14 Optional, | |
15 Set, | |
16 Tuple, | |
17 TypeVar, | |
18 Union, | |
19 cast, | |
20 ) | |
21 | |
22 from future.utils import raise_from | |
23 from pkg_resources import resource_stream | |
24 from six import iteritems, string_types | |
25 from six.moves import urllib | |
26 from typing_extensions import Text # pylint: disable=unused-import | |
27 | |
28 from ruamel import yaml | |
29 from ruamel.yaml.comments import CommentedMap, CommentedSeq | |
30 from schema_salad.utils import ( | |
31 add_dictlist, | |
32 aslist, | |
33 convert_to_dict, | |
34 flatten, | |
35 json_dumps, | |
36 ) | |
37 | |
38 from . import _logger, jsonld_context, ref_resolver, validate | |
39 from .exceptions import ( | |
40 ClassValidationException, | |
41 ValidationException, | |
42 SchemaSaladException, | |
43 ) | |
44 from .avro.schema import Names, SchemaParseException, make_avsc_object | |
45 from .ref_resolver import Loader | |
46 from .sourceline import SourceLine, add_lc_filename, relname | |
47 | |
48 SALAD_FILES = ( | |
49 "metaschema.yml", | |
50 "metaschema_base.yml", | |
51 "salad.md", | |
52 "field_name.yml", | |
53 "import_include.md", | |
54 "link_res.yml", | |
55 "ident_res.yml", | |
56 "vocab_res.yml", | |
57 "vocab_res.yml", | |
58 "field_name_schema.yml", | |
59 "field_name_src.yml", | |
60 "field_name_proc.yml", | |
61 "ident_res_schema.yml", | |
62 "ident_res_src.yml", | |
63 "ident_res_proc.yml", | |
64 "link_res_schema.yml", | |
65 "link_res_src.yml", | |
66 "link_res_proc.yml", | |
67 "vocab_res_schema.yml", | |
68 "vocab_res_src.yml", | |
69 "vocab_res_proc.yml", | |
70 "map_res.yml", | |
71 "map_res_schema.yml", | |
72 "map_res_src.yml", | |
73 "map_res_proc.yml", | |
74 "typedsl_res.yml", | |
75 "typedsl_res_schema.yml", | |
76 "typedsl_res_src.yml", | |
77 "typedsl_res_proc.yml", | |
78 "sfdsl_res.yml", | |
79 "sfdsl_res_schema.yml", | |
80 "sfdsl_res_src.yml", | |
81 "sfdsl_res_proc.yml", | |
82 ) | |
83 | |
84 saladp = "https://w3id.org/cwl/salad#" | |
85 | |
86 | |
87 def get_metaschema(): # type: () -> Tuple[Names, List[Dict[Text, Any]], Loader] | |
88 """Instantiate the metaschema.""" | |
89 loader = ref_resolver.Loader( | |
90 { | |
91 "Any": saladp + "Any", | |
92 "ArraySchema": saladp + "ArraySchema", | |
93 "Array_symbol": saladp + "ArraySchema/type/Array_symbol", | |
94 "DocType": saladp + "DocType", | |
95 "Documentation": saladp + "Documentation", | |
96 "Documentation_symbol": saladp + "Documentation/type/Documentation_symbol", | |
97 "Documented": saladp + "Documented", | |
98 "EnumSchema": saladp + "EnumSchema", | |
99 "Enum_symbol": saladp + "EnumSchema/type/Enum_symbol", | |
100 "JsonldPredicate": saladp + "JsonldPredicate", | |
101 "NamedType": saladp + "NamedType", | |
102 "PrimitiveType": saladp + "PrimitiveType", | |
103 "RecordField": saladp + "RecordField", | |
104 "RecordSchema": saladp + "RecordSchema", | |
105 "Record_symbol": saladp + "RecordSchema/type/Record_symbol", | |
106 "SaladEnumSchema": saladp + "SaladEnumSchema", | |
107 "SaladRecordField": saladp + "SaladRecordField", | |
108 "SaladRecordSchema": saladp + "SaladRecordSchema", | |
109 "SchemaDefinedType": saladp + "SchemaDefinedType", | |
110 "SpecializeDef": saladp + "SpecializeDef", | |
111 "_container": saladp + "JsonldPredicate/_container", | |
112 "_id": {"@id": saladp + "_id", "@type": "@id", "identity": True}, | |
113 "_type": saladp + "JsonldPredicate/_type", | |
114 "abstract": saladp + "SaladRecordSchema/abstract", | |
115 "array": saladp + "array", | |
116 "boolean": "http://www.w3.org/2001/XMLSchema#boolean", | |
117 "dct": "http://purl.org/dc/terms/", | |
118 "default": {"@id": saladp + "default", "noLinkCheck": True}, | |
119 "doc": "rdfs:comment", | |
120 "docAfter": {"@id": saladp + "docAfter", "@type": "@id"}, | |
121 "docChild": {"@id": saladp + "docChild", "@type": "@id"}, | |
122 "docParent": {"@id": saladp + "docParent", "@type": "@id"}, | |
123 "documentRoot": saladp + "SchemaDefinedType/documentRoot", | |
124 "documentation": saladp + "documentation", | |
125 "double": "http://www.w3.org/2001/XMLSchema#double", | |
126 "enum": saladp + "enum", | |
127 "extends": {"@id": saladp + "extends", "@type": "@id", "refScope": 1}, | |
128 "fields": { | |
129 "@id": saladp + "fields", | |
130 "mapPredicate": "type", | |
131 "mapSubject": "name", | |
132 }, | |
133 "float": "http://www.w3.org/2001/XMLSchema#float", | |
134 "identity": saladp + "JsonldPredicate/identity", | |
135 "inVocab": saladp + "NamedType/inVocab", | |
136 "int": "http://www.w3.org/2001/XMLSchema#int", | |
137 "items": {"@id": saladp + "items", "@type": "@vocab", "refScope": 2}, | |
138 "jsonldPredicate": "sld:jsonldPredicate", | |
139 "long": "http://www.w3.org/2001/XMLSchema#long", | |
140 "mapPredicate": saladp + "JsonldPredicate/mapPredicate", | |
141 "mapSubject": saladp + "JsonldPredicate/mapSubject", | |
142 "name": "@id", | |
143 "noLinkCheck": saladp + "JsonldPredicate/noLinkCheck", | |
144 "null": saladp + "null", | |
145 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", | |
146 "rdfs": "http://www.w3.org/2000/01/rdf-schema#", | |
147 "record": saladp + "record", | |
148 "refScope": saladp + "JsonldPredicate/refScope", | |
149 "sld": saladp, | |
150 "specialize": { | |
151 "@id": saladp + "specialize", | |
152 "mapPredicate": "specializeTo", | |
153 "mapSubject": "specializeFrom", | |
154 }, | |
155 "specializeFrom": { | |
156 "@id": saladp + "specializeFrom", | |
157 "@type": "@id", | |
158 "refScope": 1, | |
159 }, | |
160 "specializeTo": { | |
161 "@id": saladp + "specializeTo", | |
162 "@type": "@id", | |
163 "refScope": 1, | |
164 }, | |
165 "string": "http://www.w3.org/2001/XMLSchema#string", | |
166 "subscope": saladp + "JsonldPredicate/subscope", | |
167 "symbols": {"@id": saladp + "symbols", "@type": "@id", "identity": True}, | |
168 "type": { | |
169 "@id": saladp + "type", | |
170 "@type": "@vocab", | |
171 "refScope": 2, | |
172 "typeDSL": True, | |
173 }, | |
174 "typeDSL": saladp + "JsonldPredicate/typeDSL", | |
175 "xsd": "http://www.w3.org/2001/XMLSchema#", | |
176 } | |
177 ) | |
178 | |
179 for salad in SALAD_FILES: | |
180 with resource_stream(__name__, "metaschema/" + salad) as stream: | |
181 loader.cache["https://w3id.org/cwl/" + salad] = stream.read() | |
182 | |
183 with resource_stream(__name__, "metaschema/metaschema.yml") as stream: | |
184 loader.cache["https://w3id.org/cwl/salad"] = stream.read() | |
185 | |
186 j = yaml.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) | |
187 add_lc_filename(j, "metaschema.yml") | |
188 j, _ = loader.resolve_all(j, saladp) | |
189 | |
190 sch_obj = make_avro(j, loader) | |
191 try: | |
192 sch_names = make_avro_schema_from_avro(sch_obj) | |
193 except SchemaParseException: | |
194 _logger.error("Metaschema error, avro was:\n%s", json_dumps(sch_obj, indent=4)) | |
195 raise | |
196 validate_doc(sch_names, j, loader, strict=True) | |
197 return (sch_names, j, loader) | |
198 | |
199 | |
200 def add_namespaces(metadata, namespaces): | |
201 # type: (Mapping[Text, Any], MutableMapping[Text, Text]) -> None | |
202 """Collect the provided namespaces, checking for conflicts.""" | |
203 for key, value in metadata.items(): | |
204 if key not in namespaces: | |
205 namespaces[key] = value | |
206 elif namespaces[key] != value: | |
207 raise ValidationException( | |
208 "Namespace prefix '{}' has conflicting definitions '{}'" | |
209 " and '{}'.".format(key, namespaces[key], value) | |
210 ) | |
211 | |
212 | |
213 def collect_namespaces(metadata): | |
214 # type: (Mapping[Text, Any]) -> Dict[Text, Text] | |
215 """Walk through the metadata object, collecting namespace declarations.""" | |
216 namespaces = {} # type: Dict[Text, Text] | |
217 if "$import_metadata" in metadata: | |
218 for value in metadata["$import_metadata"].values(): | |
219 add_namespaces(collect_namespaces(value), namespaces) | |
220 if "$namespaces" in metadata: | |
221 add_namespaces(metadata["$namespaces"], namespaces) | |
222 return namespaces | |
223 | |
224 | |
225 schema_type = Tuple[Loader, Union[Names, SchemaParseException], Dict[Text, Any], Loader] | |
226 | |
227 | |
228 def load_schema( | |
229 schema_ref, # type: Union[CommentedMap, CommentedSeq, Text] | |
230 cache=None, # type: Optional[Dict[Text, Text]] | |
231 ): | |
232 # type: (...) -> schema_type | |
233 """ | |
234 Load a schema that can be used to validate documents using load_and_validate. | |
235 | |
236 return: document_loader, avsc_names, schema_metadata, metaschema_loader | |
237 """ | |
238 | |
239 metaschema_names, _metaschema_doc, metaschema_loader = get_metaschema() | |
240 if cache is not None: | |
241 metaschema_loader.cache.update(cache) | |
242 schema_doc, schema_metadata = metaschema_loader.resolve_ref(schema_ref, "") | |
243 | |
244 if not isinstance(schema_doc, MutableSequence): | |
245 raise ValidationException("Schema reference must resolve to a list.") | |
246 | |
247 validate_doc(metaschema_names, schema_doc, metaschema_loader, True) | |
248 metactx = schema_metadata.get("@context", {}) | |
249 metactx.update(collect_namespaces(schema_metadata)) | |
250 schema_ctx = jsonld_context.salad_to_jsonld_context(schema_doc, metactx)[0] | |
251 | |
252 # Create the loader that will be used to load the target document. | |
253 document_loader = Loader(schema_ctx, cache=cache) | |
254 | |
255 # Make the Avro validation that will be used to validate the target | |
256 # document | |
257 avsc_names = make_avro_schema(schema_doc, document_loader) | |
258 | |
259 return document_loader, avsc_names, schema_metadata, metaschema_loader | |
260 | |
261 | |
262 def load_and_validate( | |
263 document_loader, # type: Loader | |
264 avsc_names, # type: Names | |
265 document, # type: Union[CommentedMap, Text] | |
266 strict, # type: bool | |
267 strict_foreign_properties=False, # type: bool | |
268 ): | |
269 # type: (...) -> Tuple[Any, Dict[Text, Any]] | |
270 """Load a document and validate it with the provided schema. | |
271 | |
272 return data, metadata | |
273 """ | |
274 try: | |
275 if isinstance(document, CommentedMap): | |
276 data, metadata = document_loader.resolve_all( | |
277 document, | |
278 document["id"], | |
279 checklinks=True, | |
280 strict_foreign_properties=strict_foreign_properties, | |
281 ) | |
282 else: | |
283 data, metadata = document_loader.resolve_ref( | |
284 document, | |
285 checklinks=True, | |
286 strict_foreign_properties=strict_foreign_properties, | |
287 ) | |
288 | |
289 validate_doc( | |
290 avsc_names, | |
291 data, | |
292 document_loader, | |
293 strict, | |
294 strict_foreign_properties=strict_foreign_properties, | |
295 ) | |
296 except ValidationException as exc: | |
297 raise_from(ValidationException("", None, [exc]), exc) | |
298 return data, metadata | |
299 | |
300 | |
301 def validate_doc( | |
302 schema_names, # type: Names | |
303 doc, # type: Union[Dict[Text, Any], List[Dict[Text, Any]], Text, None] | |
304 loader, # type: Loader | |
305 strict, # type: bool | |
306 strict_foreign_properties=False, # type: bool | |
307 ): | |
308 # type: (...) -> None | |
309 """Validate a document using the provided schema.""" | |
310 has_root = False | |
311 for root in schema_names.names.values(): | |
312 if (hasattr(root, "get_prop") and root.get_prop(u"documentRoot")) or ( | |
313 u"documentRoot" in root.props | |
314 ): | |
315 has_root = True | |
316 break | |
317 | |
318 if not has_root: | |
319 raise ValidationException("No document roots defined in the schema") | |
320 | |
321 if isinstance(doc, MutableSequence): | |
322 vdoc = doc | |
323 elif isinstance(doc, CommentedMap): | |
324 vdoc = CommentedSeq([doc]) | |
325 vdoc.lc.add_kv_line_col(0, [doc.lc.line, doc.lc.col]) | |
326 vdoc.lc.filename = doc.lc.filename | |
327 else: | |
328 raise ValidationException("Document must be dict or list") | |
329 | |
330 roots = [] | |
331 for root in schema_names.names.values(): | |
332 if (hasattr(root, "get_prop") and root.get_prop(u"documentRoot")) or ( | |
333 root.props.get(u"documentRoot") | |
334 ): | |
335 roots.append(root) | |
336 | |
337 anyerrors = [] | |
338 for pos, item in enumerate(vdoc): | |
339 sourceline = SourceLine(vdoc, pos, Text) | |
340 success = False | |
341 for root in roots: | |
342 success = validate.validate_ex( | |
343 root, | |
344 item, | |
345 loader.identifiers, | |
346 strict, | |
347 foreign_properties=loader.foreign_properties, | |
348 raise_ex=False, | |
349 skip_foreign_properties=loader.skip_schemas, | |
350 strict_foreign_properties=strict_foreign_properties, | |
351 ) | |
352 if success: | |
353 break | |
354 | |
355 if not success: | |
356 errors = [] # type: List[SchemaSaladException] | |
357 for root in roots: | |
358 if hasattr(root, "get_prop"): | |
359 name = root.get_prop(u"name") | |
360 elif hasattr(root, "name"): | |
361 name = root.name | |
362 | |
363 try: | |
364 validate.validate_ex( | |
365 root, | |
366 item, | |
367 loader.identifiers, | |
368 strict, | |
369 foreign_properties=loader.foreign_properties, | |
370 raise_ex=True, | |
371 skip_foreign_properties=loader.skip_schemas, | |
372 strict_foreign_properties=strict_foreign_properties, | |
373 ) | |
374 except ClassValidationException as exc: | |
375 errors = [ | |
376 ClassValidationException( | |
377 "tried `{}` but".format(name), sourceline, [exc] | |
378 ) | |
379 ] | |
380 break | |
381 except ValidationException as exc: | |
382 errors.append( | |
383 ValidationException( | |
384 "tried `{}` but".format(name), sourceline, [exc] | |
385 ) | |
386 ) | |
387 | |
388 objerr = u"Invalid" | |
389 for ident in loader.identifiers: | |
390 if ident in item: | |
391 objerr = u"Object `{}` is not valid because".format( | |
392 relname(item[ident]) | |
393 ) | |
394 break | |
395 anyerrors.append(ValidationException(objerr, sourceline, errors, "-")) | |
396 if anyerrors: | |
397 raise ValidationException("", None, anyerrors, "*") | |
398 | |
399 | |
400 def get_anon_name(rec): | |
401 # type: (MutableMapping[Text, Union[Text, Dict[Text, Text]]]) -> Text | |
402 """Calculate a reproducible name for anonymous types.""" | |
403 if "name" in rec: | |
404 name = rec["name"] | |
405 if isinstance(name, Text): | |
406 return name | |
407 raise ValidationException( | |
408 "Expected name field to be a string, was {}".format(name) | |
409 ) | |
410 anon_name = u"" | |
411 if rec["type"] in ("enum", saladp + "enum"): | |
412 for sym in rec["symbols"]: | |
413 anon_name += sym | |
414 return "enum_" + hashlib.sha1(anon_name.encode("UTF-8")).hexdigest() | |
415 if rec["type"] in ("record", saladp + "record"): | |
416 for field in rec["fields"]: | |
417 if isinstance(field, Mapping): | |
418 anon_name += field[u"name"] | |
419 else: | |
420 raise ValidationException( | |
421 "Expected entries in 'fields' to also be maps, was {}.".format( | |
422 field | |
423 ) | |
424 ) | |
425 return u"record_" + hashlib.sha1(anon_name.encode("UTF-8")).hexdigest() | |
426 if rec["type"] in ("array", saladp + "array"): | |
427 return u"" | |
428 raise ValidationException("Expected enum or record, was {}".format(rec["type"])) | |
429 | |
430 | |
431 def replace_type(items, spec, loader, found, find_embeds=True, deepen=True): | |
432 # type: (Any, Dict[Text, Any], Loader, Set[Text], bool, bool) -> Any | |
433 """ Go through and replace types in the 'spec' mapping""" | |
434 | |
435 if isinstance(items, MutableMapping): | |
436 # recursively check these fields for types to replace | |
437 if items.get("type") in ("record", "enum") and items.get("name"): | |
438 if items["name"] in found: | |
439 return items["name"] | |
440 found.add(items["name"]) | |
441 | |
442 if not deepen: | |
443 return items | |
444 | |
445 items = copy.copy(items) | |
446 if not items.get("name"): | |
447 items["name"] = get_anon_name(items) | |
448 for name in ("type", "items", "fields"): | |
449 if name in items: | |
450 items[name] = replace_type( | |
451 items[name], | |
452 spec, | |
453 loader, | |
454 found, | |
455 find_embeds=find_embeds, | |
456 deepen=find_embeds, | |
457 ) | |
458 if isinstance(items[name], MutableSequence): | |
459 items[name] = flatten(items[name]) | |
460 | |
461 return items | |
462 if isinstance(items, MutableSequence): | |
463 # recursively transform list | |
464 return [ | |
465 replace_type(i, spec, loader, found, find_embeds=find_embeds, deepen=deepen) | |
466 for i in items | |
467 ] | |
468 if isinstance(items, string_types): | |
469 # found a string which is a symbol corresponding to a type. | |
470 replace_with = None | |
471 if items in loader.vocab: | |
472 # If it's a vocabulary term, first expand it to its fully qualified | |
473 # URI | |
474 items = loader.vocab[items] | |
475 | |
476 if items in spec: | |
477 # Look up in specialization map | |
478 replace_with = spec[items] | |
479 | |
480 if replace_with: | |
481 return replace_type( | |
482 replace_with, spec, loader, found, find_embeds=find_embeds | |
483 ) | |
484 found.add(items) | |
485 return items | |
486 | |
487 | |
488 def avro_name(url): # type: (Text) -> Text | |
489 """ | |
490 Turn a URL into an Avro-safe name. | |
491 | |
492 If the URL has no fragment, return this plain URL. | |
493 | |
494 Extract either the last part of the URL fragment past the slash, otherwise | |
495 the whole fragment. | |
496 """ | |
497 frg = urllib.parse.urldefrag(url)[1] | |
498 if frg != "": | |
499 if "/" in frg: | |
500 return frg[frg.rindex("/") + 1 :] | |
501 return frg | |
502 return url | |
503 | |
504 | |
505 Avro = TypeVar("Avro", Dict[Text, Any], List[Any], Text) | |
506 | |
507 | |
508 def make_valid_avro( | |
509 items, # type: Avro | |
510 alltypes, # type: Dict[Text, Dict[Text, Any]] | |
511 found, # type: Set[Text] | |
512 union=False, # type: bool | |
513 ): # type: (...) -> Union[Avro, Dict[Text, Text], Text] | |
514 """Convert our schema to be more avro like.""" | |
515 # Possibly could be integrated into our fork of avro/schema.py? | |
516 if isinstance(items, MutableMapping): | |
517 items = copy.copy(items) | |
518 if items.get("name") and items.get("inVocab", True): | |
519 items["name"] = avro_name(items["name"]) | |
520 | |
521 if "type" in items and items["type"] in ( | |
522 saladp + "record", | |
523 saladp + "enum", | |
524 "record", | |
525 "enum", | |
526 ): | |
527 if (hasattr(items, "get") and items.get("abstract")) or ( | |
528 "abstract" in items | |
529 ): | |
530 return items | |
531 if items["name"] in found: | |
532 return cast(Text, items["name"]) | |
533 found.add(items["name"]) | |
534 for field in ("type", "items", "values", "fields"): | |
535 if field in items: | |
536 items[field] = make_valid_avro( | |
537 items[field], alltypes, found, union=True | |
538 ) | |
539 if "symbols" in items: | |
540 items["symbols"] = [avro_name(sym) for sym in items["symbols"]] | |
541 return items | |
542 if isinstance(items, MutableSequence): | |
543 ret = [] | |
544 for i in items: | |
545 ret.append(make_valid_avro(i, alltypes, found, union=union)) | |
546 return ret | |
547 if union and isinstance(items, string_types): | |
548 if items in alltypes and avro_name(items) not in found: | |
549 return cast( | |
550 Dict[Text, Text], | |
551 make_valid_avro(alltypes[items], alltypes, found, union=union), | |
552 ) | |
553 items = avro_name(items) | |
554 return items | |
555 | |
556 | |
557 def deepcopy_strip(item): # type: (Any) -> Any | |
558 """ | |
559 Make a deep copy of list and dict objects. | |
560 | |
561 Intentionally do not copy attributes. This is to discard CommentedMap and | |
562 CommentedSeq metadata which is very expensive with regular copy.deepcopy. | |
563 """ | |
564 | |
565 if isinstance(item, MutableMapping): | |
566 return {k: deepcopy_strip(v) for k, v in iteritems(item)} | |
567 if isinstance(item, MutableSequence): | |
568 return [deepcopy_strip(k) for k in item] | |
569 return item | |
570 | |
571 | |
572 def extend_and_specialize(items, loader): | |
573 # type: (List[Dict[Text, Any]], Loader) -> List[Dict[Text, Any]] | |
574 """ | |
575 Apply 'extend' and 'specialize' to fully materialize derived record types. | |
576 """ | |
577 | |
578 items = deepcopy_strip(items) | |
579 types = {i["name"]: i for i in items} # type: Dict[Text, Any] | |
580 results = [] | |
581 | |
582 for stype in items: | |
583 if "extends" in stype: | |
584 specs = {} # type: Dict[Text, Text] | |
585 if "specialize" in stype: | |
586 for spec in aslist(stype["specialize"]): | |
587 specs[spec["specializeFrom"]] = spec["specializeTo"] | |
588 | |
589 exfields = [] # type: List[Text] | |
590 exsym = [] # type: List[Text] | |
591 for ex in aslist(stype["extends"]): | |
592 if ex not in types: | |
593 raise ValidationException( | |
594 "Extends {} in {} refers to invalid base type.".format( | |
595 stype["extends"], stype["name"] | |
596 ) | |
597 ) | |
598 | |
599 basetype = copy.copy(types[ex]) | |
600 | |
601 if stype["type"] == "record": | |
602 if specs: | |
603 basetype["fields"] = replace_type( | |
604 basetype.get("fields", []), specs, loader, set() | |
605 ) | |
606 | |
607 for field in basetype.get("fields", []): | |
608 if "inherited_from" not in field: | |
609 field["inherited_from"] = ex | |
610 | |
611 exfields.extend(basetype.get("fields", [])) | |
612 elif stype["type"] == "enum": | |
613 exsym.extend(basetype.get("symbols", [])) | |
614 | |
615 if stype["type"] == "record": | |
616 stype = copy.copy(stype) | |
617 exfields.extend(stype.get("fields", [])) | |
618 stype["fields"] = exfields | |
619 | |
620 fieldnames = set() # type: Set[Text] | |
621 for field in stype["fields"]: | |
622 if field["name"] in fieldnames: | |
623 raise ValidationException( | |
624 "Field name {} appears twice in {}".format( | |
625 field["name"], stype["name"] | |
626 ) | |
627 ) | |
628 else: | |
629 fieldnames.add(field["name"]) | |
630 elif stype["type"] == "enum": | |
631 stype = copy.copy(stype) | |
632 exsym.extend(stype.get("symbols", [])) | |
633 stype["symbol"] = exsym | |
634 | |
635 types[stype["name"]] = stype | |
636 | |
637 results.append(stype) | |
638 | |
639 ex_types = {} | |
640 for result in results: | |
641 ex_types[result["name"]] = result | |
642 | |
643 extended_by = {} # type: Dict[Text, Text] | |
644 for result in results: | |
645 if "extends" in result: | |
646 for ex in aslist(result["extends"]): | |
647 if ex_types[ex].get("abstract"): | |
648 add_dictlist(extended_by, ex, ex_types[result["name"]]) | |
649 add_dictlist(extended_by, avro_name(ex), ex_types[ex]) | |
650 | |
651 for result in results: | |
652 if result.get("abstract") and result["name"] not in extended_by: | |
653 raise ValidationException( | |
654 "{} is abstract but missing a concrete subtype".format(result["name"]) | |
655 ) | |
656 | |
657 for result in results: | |
658 if "fields" in result: | |
659 result["fields"] = replace_type( | |
660 result["fields"], extended_by, loader, set() | |
661 ) | |
662 | |
663 return results | |
664 | |
665 | |
666 def make_avro( | |
667 i, # type: List[Dict[Text, Any]] | |
668 loader, # type: Loader | |
669 ): # type: (...) -> List[Any] | |
670 | |
671 j = extend_and_specialize(i, loader) | |
672 | |
673 name_dict = {} # type: Dict[Text, Dict[Text, Any]] | |
674 for entry in j: | |
675 name_dict[entry["name"]] = entry | |
676 avro = make_valid_avro(j, name_dict, set()) | |
677 | |
678 return [ | |
679 t | |
680 for t in avro | |
681 if isinstance(t, MutableMapping) | |
682 and not t.get("abstract") | |
683 and t.get("type") != "documentation" | |
684 ] | |
685 | |
686 | |
687 def make_avro_schema( | |
688 i, # type: List[Any] | |
689 loader, # type: Loader | |
690 ): # type: (...) -> Names | |
691 """ | |
692 All in one convenience function. | |
693 | |
694 Call make_avro() and make_avro_schema_from_avro() separately if you need | |
695 the intermediate result for diagnostic output. | |
696 """ | |
697 names = Names() | |
698 avro = make_avro(i, loader) | |
699 make_avsc_object(convert_to_dict(avro), names) | |
700 return names | |
701 | |
702 | |
703 def make_avro_schema_from_avro(avro): | |
704 # type: (List[Union[Avro, Dict[Text, Text], Text]]) -> Names | |
705 names = Names() | |
706 make_avsc_object(convert_to_dict(avro), names) | |
707 return names | |
708 | |
709 | |
710 def shortname(inputid): # type: (Text) -> Text | |
711 """Returns the last segment of the provided fragment or path.""" | |
712 parsed_id = urllib.parse.urlparse(inputid) | |
713 if parsed_id.fragment: | |
714 return parsed_id.fragment.split(u"/")[-1] | |
715 return parsed_id.path.split(u"/")[-1] | |
716 | |
717 | |
718 def print_inheritance(doc, stream): | |
719 # type: (List[Dict[Text, Any]], IO[Any]) -> None | |
720 """Write a Grapviz inheritance graph for the supplied document.""" | |
721 stream.write("digraph {\n") | |
722 for entry in doc: | |
723 if entry["type"] == "record": | |
724 label = name = shortname(entry["name"]) | |
725 fields = entry.get("fields", []) | |
726 if fields: | |
727 label += "\\n* {}\\l".format( | |
728 "\\l* ".join(shortname(field["name"]) for field in fields) | |
729 ) | |
730 shape = "ellipse" if entry.get("abstract") else "box" | |
731 stream.write('"{}" [shape={} label="{}"];\n'.format(name, shape, label)) | |
732 if "extends" in entry: | |
733 for target in aslist(entry["extends"]): | |
734 stream.write('"{}" -> "{}";\n'.format(shortname(target), name)) | |
735 stream.write("}\n") | |
736 | |
737 | |
738 def print_fieldrefs(doc, loader, stream): | |
739 # type: (List[Dict[Text, Any]], Loader, IO[Any]) -> None | |
740 """Write a GraphViz graph of the relationships between the fields.""" | |
741 obj = extend_and_specialize(doc, loader) | |
742 | |
743 primitives = set( | |
744 ( | |
745 "http://www.w3.org/2001/XMLSchema#string", | |
746 "http://www.w3.org/2001/XMLSchema#boolean", | |
747 "http://www.w3.org/2001/XMLSchema#int", | |
748 "http://www.w3.org/2001/XMLSchema#long", | |
749 saladp + "null", | |
750 saladp + "enum", | |
751 saladp + "array", | |
752 saladp + "record", | |
753 saladp + "Any", | |
754 ) | |
755 ) | |
756 | |
757 stream.write("digraph {\n") | |
758 for entry in obj: | |
759 if entry.get("abstract"): | |
760 continue | |
761 if entry["type"] == "record": | |
762 label = shortname(entry["name"]) | |
763 for field in entry.get("fields", []): | |
764 found = set() # type: Set[Text] | |
765 field_name = shortname(field["name"]) | |
766 replace_type(field["type"], {}, loader, found, find_embeds=False) | |
767 for each_type in found: | |
768 if each_type not in primitives: | |
769 stream.write( | |
770 '"{}" -> "{}" [label="{}"];\n'.format( | |
771 label, shortname(each_type), field_name | |
772 ) | |
773 ) | |
774 stream.write("}\n") |