Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/schema_salad/validate.py @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author | shellac |
---|---|
date | Thu, 14 May 2020 14:56:58 -0400 |
parents | 26e78fe6e8c4 |
children |
comparison
equal
deleted
inserted
replaced
1:75ca89e9b81c | 2:6af9afd405e9 |
---|---|
1 from __future__ import absolute_import | |
2 | |
3 import logging | |
4 import pprint | |
5 from typing import ( # noqa: F401 | |
6 Any, | |
7 List, | |
8 MutableMapping, | |
9 MutableSequence, | |
10 Optional, | |
11 Set, | |
12 Union, | |
13 ) | |
14 | |
15 import six | |
16 from six.moves import urllib | |
17 from typing_extensions import Text # pylint: disable=unused-import | |
18 | |
19 from . import avro | |
20 from .exceptions import ( | |
21 ClassValidationException, | |
22 ValidationException, | |
23 SchemaSaladException, | |
24 ) | |
25 from .avro import schema # noqa: F401 | |
26 from .avro.schema import ( # pylint: disable=unused-import, no-name-in-module, import-error | |
27 Schema, | |
28 ) | |
29 from .sourceline import SourceLine | |
30 | |
31 # move to a regular typing import when Python 3.3-3.6 is no longer supported | |
32 | |
33 | |
34 _logger = logging.getLogger("salad") | |
35 | |
36 | |
37 def validate( | |
38 expected_schema, # type: Schema | |
39 datum, # type: Any | |
40 identifiers=None, # type: Optional[List[Text]] | |
41 strict=False, # type: bool | |
42 foreign_properties=None, # type: Optional[Set[Text]] | |
43 ): | |
44 # type: (...) -> bool | |
45 if not identifiers: | |
46 identifiers = [] | |
47 if not foreign_properties: | |
48 foreign_properties = set() | |
49 return validate_ex( | |
50 expected_schema, | |
51 datum, | |
52 identifiers, | |
53 strict=strict, | |
54 foreign_properties=foreign_properties, | |
55 raise_ex=False, | |
56 ) | |
57 | |
58 | |
59 INT_MIN_VALUE = -(1 << 31) | |
60 INT_MAX_VALUE = (1 << 31) - 1 | |
61 LONG_MIN_VALUE = -(1 << 63) | |
62 LONG_MAX_VALUE = (1 << 63) - 1 | |
63 | |
64 | |
65 def friendly(v): # type: (Any) -> Any | |
66 if isinstance(v, avro.schema.NamedSchema): | |
67 return v.name | |
68 if isinstance(v, avro.schema.ArraySchema): | |
69 return "array of <{}>".format(friendly(v.items)) | |
70 elif isinstance(v, avro.schema.PrimitiveSchema): | |
71 return v.type | |
72 elif isinstance(v, avro.schema.UnionSchema): | |
73 return " or ".join([friendly(s) for s in v.schemas]) | |
74 else: | |
75 return v | |
76 | |
77 | |
78 def vpformat(datum): # type: (Any) -> str | |
79 a = pprint.pformat(datum) | |
80 if len(a) > 160: | |
81 a = a[0:160] + "[...]" | |
82 return a | |
83 | |
84 | |
85 def validate_ex( | |
86 expected_schema, # type: Schema | |
87 datum, # type: Any | |
88 identifiers=None, # type: Optional[List[Text]] | |
89 strict=False, # type: bool | |
90 foreign_properties=None, # type: Optional[Set[Text]] | |
91 raise_ex=True, # type: bool | |
92 strict_foreign_properties=False, # type: bool | |
93 logger=_logger, # type: logging.Logger | |
94 skip_foreign_properties=False, # type: bool | |
95 ): | |
96 # type: (...) -> bool | |
97 """Determine if a python datum is an instance of a schema.""" | |
98 | |
99 if not identifiers: | |
100 identifiers = [] | |
101 | |
102 if not foreign_properties: | |
103 foreign_properties = set() | |
104 | |
105 schema_type = expected_schema.type | |
106 | |
107 if schema_type == "null": | |
108 if datum is None: | |
109 return True | |
110 else: | |
111 if raise_ex: | |
112 raise ValidationException(u"the value is not null") | |
113 else: | |
114 return False | |
115 elif schema_type == "boolean": | |
116 if isinstance(datum, bool): | |
117 return True | |
118 else: | |
119 if raise_ex: | |
120 raise ValidationException(u"the value is not boolean") | |
121 else: | |
122 return False | |
123 elif schema_type == "string": | |
124 if isinstance(datum, six.string_types): | |
125 return True | |
126 elif isinstance(datum, bytes): | |
127 datum = datum.decode(u"utf-8") | |
128 return True | |
129 else: | |
130 if raise_ex: | |
131 raise ValidationException(u"the value is not string") | |
132 else: | |
133 return False | |
134 elif schema_type == "int": | |
135 if ( | |
136 isinstance(datum, six.integer_types) | |
137 and INT_MIN_VALUE <= datum <= INT_MAX_VALUE | |
138 ): | |
139 return True | |
140 else: | |
141 if raise_ex: | |
142 raise ValidationException(u"`{}` is not int".format(vpformat(datum))) | |
143 else: | |
144 return False | |
145 elif schema_type == "long": | |
146 if ( | |
147 isinstance(datum, six.integer_types) | |
148 ) and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE: | |
149 return True | |
150 else: | |
151 if raise_ex: | |
152 raise ValidationException( | |
153 u"the value `{}` is not long".format(vpformat(datum)) | |
154 ) | |
155 else: | |
156 return False | |
157 elif schema_type in ["float", "double"]: | |
158 if isinstance(datum, six.integer_types) or isinstance(datum, float): | |
159 return True | |
160 else: | |
161 if raise_ex: | |
162 raise ValidationException( | |
163 u"the value `{}` is not float or double".format(vpformat(datum)) | |
164 ) | |
165 else: | |
166 return False | |
167 elif isinstance(expected_schema, avro.schema.EnumSchema): | |
168 if expected_schema.name == "Any": | |
169 if datum is not None: | |
170 return True | |
171 else: | |
172 if raise_ex: | |
173 raise ValidationException(u"'Any' type must be non-null") | |
174 else: | |
175 return False | |
176 if not isinstance(datum, six.string_types): | |
177 if raise_ex: | |
178 raise ValidationException( | |
179 u"value is a {} but expected a string".format( | |
180 (type(datum).__name__) | |
181 ) | |
182 ) | |
183 else: | |
184 return False | |
185 if expected_schema.name == "Expression": | |
186 if "$(" in datum or "${" in datum: | |
187 return True | |
188 if raise_ex: | |
189 raise ValidationException( | |
190 u"value `%s` does not contain an expression in the form $() or ${}" | |
191 % datum | |
192 ) | |
193 else: | |
194 return False | |
195 if datum in expected_schema.symbols: | |
196 return True | |
197 else: | |
198 if raise_ex: | |
199 raise ValidationException( | |
200 u"the value {} is not a valid {}, expected {}{}".format( | |
201 vpformat(datum), | |
202 expected_schema.name, | |
203 "one of " if len(expected_schema.symbols) > 1 else "", | |
204 "'" + "', '".join(expected_schema.symbols) + "'", | |
205 ) | |
206 ) | |
207 else: | |
208 return False | |
209 elif isinstance(expected_schema, avro.schema.ArraySchema): | |
210 if isinstance(datum, MutableSequence): | |
211 for i, d in enumerate(datum): | |
212 try: | |
213 sl = SourceLine(datum, i, ValidationException) | |
214 if not validate_ex( | |
215 expected_schema.items, | |
216 d, | |
217 identifiers, | |
218 strict=strict, | |
219 foreign_properties=foreign_properties, | |
220 raise_ex=raise_ex, | |
221 strict_foreign_properties=strict_foreign_properties, | |
222 logger=logger, | |
223 skip_foreign_properties=skip_foreign_properties, | |
224 ): | |
225 return False | |
226 except ValidationException as v: | |
227 if raise_ex: | |
228 raise ValidationException("item is invalid because", sl, [v]) | |
229 else: | |
230 return False | |
231 return True | |
232 else: | |
233 if raise_ex: | |
234 raise ValidationException( | |
235 u"the value {} is not a list, expected list of {}".format( | |
236 vpformat(datum), friendly(expected_schema.items) | |
237 ) | |
238 ) | |
239 else: | |
240 return False | |
241 elif isinstance(expected_schema, avro.schema.UnionSchema): | |
242 for s in expected_schema.schemas: | |
243 if validate_ex( | |
244 s, | |
245 datum, | |
246 identifiers, | |
247 strict=strict, | |
248 raise_ex=False, | |
249 strict_foreign_properties=strict_foreign_properties, | |
250 logger=logger, | |
251 skip_foreign_properties=skip_foreign_properties, | |
252 ): | |
253 return True | |
254 | |
255 if not raise_ex: | |
256 return False | |
257 | |
258 errors = [] # type: List[SchemaSaladException] | |
259 checked = [] | |
260 for s in expected_schema.schemas: | |
261 if isinstance(datum, MutableSequence) and not isinstance( | |
262 s, avro.schema.ArraySchema | |
263 ): | |
264 continue | |
265 elif isinstance(datum, MutableMapping) and not isinstance( | |
266 s, avro.schema.RecordSchema | |
267 ): | |
268 continue | |
269 elif isinstance( | |
270 datum, (bool, six.integer_types, float, six.string_types) | |
271 ) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): | |
272 continue | |
273 elif datum is not None and s.type == "null": | |
274 continue | |
275 | |
276 checked.append(s) | |
277 try: | |
278 validate_ex( | |
279 s, | |
280 datum, | |
281 identifiers, | |
282 strict=strict, | |
283 foreign_properties=foreign_properties, | |
284 raise_ex=True, | |
285 strict_foreign_properties=strict_foreign_properties, | |
286 logger=logger, | |
287 skip_foreign_properties=skip_foreign_properties, | |
288 ) | |
289 except ClassValidationException: | |
290 raise | |
291 except ValidationException as e: | |
292 errors.append(e) | |
293 if bool(errors): | |
294 raise ValidationException( | |
295 "", | |
296 None, | |
297 [ | |
298 ValidationException( | |
299 "tried {} but".format(friendly(check)), None, [err] | |
300 ) | |
301 for (check, err) in zip(checked, errors) | |
302 ], | |
303 "-", | |
304 ) | |
305 else: | |
306 raise ValidationException( | |
307 "value is a {}, expected {}".format( | |
308 type(datum).__name__, friendly(expected_schema) | |
309 ) | |
310 ) | |
311 | |
312 elif isinstance(expected_schema, avro.schema.RecordSchema): | |
313 if not isinstance(datum, MutableMapping): | |
314 if raise_ex: | |
315 raise ValidationException(u"is not a dict") | |
316 else: | |
317 return False | |
318 | |
319 classmatch = None | |
320 for f in expected_schema.fields: | |
321 if f.name in ("class",): | |
322 d = datum.get(f.name) | |
323 if not d: | |
324 if raise_ex: | |
325 raise ValidationException(u"Missing '{}' field".format(f.name)) | |
326 else: | |
327 return False | |
328 if expected_schema.name != d: | |
329 if raise_ex: | |
330 raise ValidationException( | |
331 u"Expected class '{}' but this is '{}'".format( | |
332 expected_schema.name, d | |
333 ) | |
334 ) | |
335 else: | |
336 return False | |
337 classmatch = d | |
338 break | |
339 | |
340 errors = [] | |
341 for f in expected_schema.fields: | |
342 if f.name in ("class",): | |
343 continue | |
344 | |
345 if f.name in datum: | |
346 fieldval = datum[f.name] | |
347 else: | |
348 try: | |
349 fieldval = f.default | |
350 except KeyError: | |
351 fieldval = None | |
352 | |
353 try: | |
354 sl = SourceLine(datum, f.name, six.text_type) | |
355 if not validate_ex( | |
356 f.type, | |
357 fieldval, | |
358 identifiers, | |
359 strict=strict, | |
360 foreign_properties=foreign_properties, | |
361 raise_ex=raise_ex, | |
362 strict_foreign_properties=strict_foreign_properties, | |
363 logger=logger, | |
364 skip_foreign_properties=skip_foreign_properties, | |
365 ): | |
366 return False | |
367 except ValidationException as v: | |
368 if f.name not in datum: | |
369 errors.append( | |
370 ValidationException( | |
371 u"missing required field `{}`".format(f.name) | |
372 ) | |
373 ) | |
374 else: | |
375 errors.append( | |
376 ValidationException( | |
377 u"the `{}` field is not valid because".format(f.name), | |
378 sl, | |
379 [v], | |
380 ) | |
381 ) | |
382 | |
383 for d in datum: | |
384 found = False | |
385 for f in expected_schema.fields: | |
386 if d == f.name: | |
387 found = True | |
388 if not found: | |
389 sl = SourceLine(datum, d, six.text_type) | |
390 if d is None: | |
391 err = ValidationException(u"mapping with implicit null key", sl) | |
392 if strict: | |
393 errors.append(err) | |
394 else: | |
395 logger.warning(err) | |
396 continue | |
397 if ( | |
398 d not in identifiers | |
399 and d not in foreign_properties | |
400 and d[0] not in ("@", "$") | |
401 ): | |
402 if ( | |
403 (d not in identifiers and strict) | |
404 and ( | |
405 d not in foreign_properties | |
406 and strict_foreign_properties | |
407 and not skip_foreign_properties | |
408 ) | |
409 and not raise_ex | |
410 ): | |
411 return False | |
412 split = urllib.parse.urlsplit(d) | |
413 if split.scheme: | |
414 if not skip_foreign_properties: | |
415 err = ValidationException( | |
416 u"unrecognized extension field `{}`{}.{}".format( | |
417 d, | |
418 " and strict_foreign_properties checking is enabled" | |
419 if strict_foreign_properties | |
420 else "", | |
421 "\nForeign properties from $schemas:\n {}".format( | |
422 "\n ".join(sorted(foreign_properties)) | |
423 ) | |
424 if len(foreign_properties) > 0 | |
425 else "", | |
426 ), | |
427 sl, | |
428 ) | |
429 if strict_foreign_properties: | |
430 errors.append(err) | |
431 elif len(foreign_properties) > 0: | |
432 logger.warning(err) | |
433 else: | |
434 err = ValidationException( | |
435 u"invalid field `{}`, expected one of: {}".format( | |
436 d, | |
437 ", ".join( | |
438 "'{}'".format(fn.name) | |
439 for fn in expected_schema.fields | |
440 ), | |
441 ), | |
442 sl, | |
443 ) | |
444 if strict: | |
445 errors.append(err) | |
446 else: | |
447 logger.warning(err) | |
448 | |
449 if bool(errors): | |
450 if raise_ex: | |
451 if classmatch: | |
452 raise ClassValidationException("", None, errors, "*") | |
453 else: | |
454 raise ValidationException("", None, errors, "*") | |
455 else: | |
456 return False | |
457 else: | |
458 return True | |
459 if raise_ex: | |
460 raise ValidationException(u"Unrecognized schema_type {}".format(schema_type)) | |
461 else: | |
462 return False |