Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/soupsieve/css_match.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 """CSS matcher.""" | |
2 from datetime import datetime | |
3 from . import util | |
4 import re | |
5 from .import css_types as ct | |
6 import unicodedata | |
7 | |
8 # Empty tag pattern (whitespace okay) | |
9 RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]') | |
10 | |
11 RE_NOT_WS = re.compile('[^ \t\r\n\f]+') | |
12 | |
13 # Relationships | |
14 REL_PARENT = ' ' | |
15 REL_CLOSE_PARENT = '>' | |
16 REL_SIBLING = '~' | |
17 REL_CLOSE_SIBLING = '+' | |
18 | |
19 # Relationships for :has() (forward looking) | |
20 REL_HAS_PARENT = ': ' | |
21 REL_HAS_CLOSE_PARENT = ':>' | |
22 REL_HAS_SIBLING = ':~' | |
23 REL_HAS_CLOSE_SIBLING = ':+' | |
24 | |
25 NS_XHTML = 'http://www.w3.org/1999/xhtml' | |
26 NS_XML = 'http://www.w3.org/XML/1998/namespace' | |
27 | |
28 DIR_FLAGS = ct.SEL_DIR_LTR | ct.SEL_DIR_RTL | |
29 RANGES = ct.SEL_IN_RANGE | ct.SEL_OUT_OF_RANGE | |
30 | |
31 DIR_MAP = { | |
32 'ltr': ct.SEL_DIR_LTR, | |
33 'rtl': ct.SEL_DIR_RTL, | |
34 'auto': 0 | |
35 } | |
36 | |
37 RE_NUM = re.compile(r"^(?P<value>-?(?:[0-9]{1,}(\.[0-9]+)?|\.[0-9]+))$") | |
38 RE_TIME = re.compile(r'^(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$') | |
39 RE_MONTH = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})$') | |
40 RE_WEEK = re.compile(r'^(?P<year>[0-9]{4,})-W(?P<week>[0-9]{2})$') | |
41 RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})$') | |
42 RE_DATETIME = re.compile( | |
43 r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$' | |
44 ) | |
45 RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)') | |
46 | |
47 MONTHS_30 = (4, 6, 9, 11) # April, June, September, and November | |
48 FEB = 2 | |
49 SHORT_MONTH = 30 | |
50 LONG_MONTH = 31 | |
51 FEB_MONTH = 28 | |
52 FEB_LEAP_MONTH = 29 | |
53 DAYS_IN_WEEK = 7 | |
54 | |
55 | |
56 class _FakeParent(object): | |
57 """ | |
58 Fake parent class. | |
59 | |
60 When we have a fragment with no `BeautifulSoup` document object, | |
61 we can't evaluate `nth` selectors properly. Create a temporary | |
62 fake parent so we can traverse the root element as a child. | |
63 """ | |
64 | |
65 def __init__(self, element): | |
66 """Initialize.""" | |
67 | |
68 self.contents = [element] | |
69 | |
70 def __len__(self): | |
71 """Length.""" | |
72 | |
73 return len(self.contents) | |
74 | |
75 | |
76 class _DocumentNav(object): | |
77 """Navigate a Beautiful Soup document.""" | |
78 | |
79 @classmethod | |
80 def assert_valid_input(cls, tag): | |
81 """Check if valid input tag or document.""" | |
82 | |
83 # Fail on unexpected types. | |
84 if not cls.is_tag(tag): | |
85 raise TypeError("Expected a BeautifulSoup 'Tag', but instead recieved type {}".format(type(tag))) | |
86 | |
87 @staticmethod | |
88 def is_doc(obj): | |
89 """Is `BeautifulSoup` object.""" | |
90 | |
91 import bs4 | |
92 return isinstance(obj, bs4.BeautifulSoup) | |
93 | |
94 @staticmethod | |
95 def is_tag(obj): | |
96 """Is tag.""" | |
97 | |
98 import bs4 | |
99 return isinstance(obj, bs4.Tag) | |
100 | |
101 @staticmethod | |
102 def is_declaration(obj): # pragma: no cover | |
103 """Is declaration.""" | |
104 | |
105 import bs4 | |
106 return isinstance(obj, bs4.Declaration) | |
107 | |
108 @staticmethod | |
109 def is_cdata(obj): | |
110 """Is CDATA.""" | |
111 | |
112 import bs4 | |
113 return isinstance(obj, bs4.CData) | |
114 | |
115 @staticmethod | |
116 def is_processing_instruction(obj): # pragma: no cover | |
117 """Is processing instruction.""" | |
118 | |
119 import bs4 | |
120 return isinstance(obj, bs4.ProcessingInstruction) | |
121 | |
122 @staticmethod | |
123 def is_navigable_string(obj): | |
124 """Is navigable string.""" | |
125 | |
126 import bs4 | |
127 return isinstance(obj, bs4.NavigableString) | |
128 | |
129 @staticmethod | |
130 def is_special_string(obj): | |
131 """Is special string.""" | |
132 | |
133 import bs4 | |
134 return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) | |
135 | |
136 @classmethod | |
137 def is_content_string(cls, obj): | |
138 """Check if node is content string.""" | |
139 | |
140 return cls.is_navigable_string(obj) and not cls.is_special_string(obj) | |
141 | |
142 @staticmethod | |
143 def create_fake_parent(el): | |
144 """Create fake parent for a given element.""" | |
145 | |
146 return _FakeParent(el) | |
147 | |
148 @staticmethod | |
149 def is_xml_tree(el): | |
150 """Check if element (or document) is from a XML tree.""" | |
151 | |
152 return el._is_xml | |
153 | |
154 def is_iframe(self, el): | |
155 """Check if element is an `iframe`.""" | |
156 | |
157 return ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and self.is_html_tag(el) | |
158 | |
159 def is_root(self, el): | |
160 """ | |
161 Return whether element is a root element. | |
162 | |
163 We check that the element is the root of the tree (which we have already pre-calculated), | |
164 and we check if it is the root element under an `iframe`. | |
165 """ | |
166 | |
167 root = self.root and self.root is el | |
168 if not root: | |
169 parent = self.get_parent(el) | |
170 root = parent is not None and self.is_html and self.is_iframe(parent) | |
171 return root | |
172 | |
173 def get_contents(self, el, no_iframe=False): | |
174 """Get contents or contents in reverse.""" | |
175 if not no_iframe or not self.is_iframe(el): | |
176 for content in el.contents: | |
177 yield content | |
178 | |
179 def get_children(self, el, start=None, reverse=False, tags=True, no_iframe=False): | |
180 """Get children.""" | |
181 | |
182 if not no_iframe or not self.is_iframe(el): | |
183 last = len(el.contents) - 1 | |
184 if start is None: | |
185 index = last if reverse else 0 | |
186 else: | |
187 index = start | |
188 end = -1 if reverse else last + 1 | |
189 incr = -1 if reverse else 1 | |
190 | |
191 if 0 <= index <= last: | |
192 while index != end: | |
193 node = el.contents[index] | |
194 index += incr | |
195 if not tags or self.is_tag(node): | |
196 yield node | |
197 | |
198 def get_descendants(self, el, tags=True, no_iframe=False): | |
199 """Get descendants.""" | |
200 | |
201 if not no_iframe or not self.is_iframe(el): | |
202 next_good = None | |
203 for child in el.descendants: | |
204 | |
205 if next_good is not None: | |
206 if child is not next_good: | |
207 continue | |
208 next_good = None | |
209 | |
210 is_tag = self.is_tag(child) | |
211 | |
212 if no_iframe and is_tag and self.is_iframe(child): | |
213 if child.next_sibling is not None: | |
214 next_good = child.next_sibling | |
215 else: | |
216 last_child = child | |
217 while self.is_tag(last_child) and last_child.contents: | |
218 last_child = last_child.contents[-1] | |
219 next_good = last_child.next_element | |
220 yield child | |
221 if next_good is None: | |
222 break | |
223 # Coverage isn't seeing this even though it's executed | |
224 continue # pragma: no cover | |
225 | |
226 if not tags or is_tag: | |
227 yield child | |
228 | |
229 def get_parent(self, el, no_iframe=False): | |
230 """Get parent.""" | |
231 | |
232 parent = el.parent | |
233 if no_iframe and parent is not None and self.is_iframe(parent): | |
234 parent = None | |
235 return parent | |
236 | |
237 @staticmethod | |
238 def get_tag_name(el): | |
239 """Get tag.""" | |
240 | |
241 return el.name | |
242 | |
243 @staticmethod | |
244 def get_prefix_name(el): | |
245 """Get prefix.""" | |
246 | |
247 return el.prefix | |
248 | |
249 @staticmethod | |
250 def get_uri(el): | |
251 """Get namespace `URI`.""" | |
252 | |
253 return el.namespace | |
254 | |
255 @classmethod | |
256 def get_next(cls, el, tags=True): | |
257 """Get next sibling tag.""" | |
258 | |
259 sibling = el.next_sibling | |
260 while tags and not cls.is_tag(sibling) and sibling is not None: | |
261 sibling = sibling.next_sibling | |
262 return sibling | |
263 | |
264 @classmethod | |
265 def get_previous(cls, el, tags=True): | |
266 """Get previous sibling tag.""" | |
267 | |
268 sibling = el.previous_sibling | |
269 while tags and not cls.is_tag(sibling) and sibling is not None: | |
270 sibling = sibling.previous_sibling | |
271 return sibling | |
272 | |
273 @staticmethod | |
274 def has_html_ns(el): | |
275 """ | |
276 Check if element has an HTML namespace. | |
277 | |
278 This is a bit different than whether a element is treated as having an HTML namespace, | |
279 like we do in the case of `is_html_tag`. | |
280 """ | |
281 | |
282 ns = getattr(el, 'namespace') if el else None | |
283 return ns and ns == NS_XHTML | |
284 | |
285 @staticmethod | |
286 def split_namespace(el, attr_name): | |
287 """Return namespace and attribute name without the prefix.""" | |
288 | |
289 return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None) | |
290 | |
291 @staticmethod | |
292 def get_attribute_by_name(el, name, default=None): | |
293 """Get attribute by name.""" | |
294 | |
295 value = default | |
296 if el._is_xml: | |
297 try: | |
298 value = el.attrs[name] | |
299 except KeyError: | |
300 pass | |
301 else: | |
302 for k, v in el.attrs.items(): | |
303 if util.lower(k) == name: | |
304 value = v | |
305 break | |
306 return value | |
307 | |
308 @staticmethod | |
309 def iter_attributes(el): | |
310 """Iterate attributes.""" | |
311 | |
312 for k, v in el.attrs.items(): | |
313 yield k, v | |
314 | |
315 @classmethod | |
316 def get_classes(cls, el): | |
317 """Get classes.""" | |
318 | |
319 classes = cls.get_attribute_by_name(el, 'class', []) | |
320 if isinstance(classes, str): | |
321 classes = RE_NOT_WS.findall(classes) | |
322 return classes | |
323 | |
324 def get_text(self, el, no_iframe=False): | |
325 """Get text.""" | |
326 | |
327 return ''.join( | |
328 [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)] | |
329 ) | |
330 | |
331 | |
332 class Inputs(object): | |
333 """Class for parsing and validating input items.""" | |
334 | |
335 @staticmethod | |
336 def validate_day(year, month, day): | |
337 """Validate day.""" | |
338 | |
339 max_days = LONG_MONTH | |
340 if month == FEB: | |
341 max_days = FEB_LEAP_MONTH if ((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0) else FEB_MONTH | |
342 elif month in MONTHS_30: | |
343 max_days = SHORT_MONTH | |
344 return 1 <= day <= max_days | |
345 | |
346 @staticmethod | |
347 def validate_week(year, week): | |
348 """Validate week.""" | |
349 | |
350 max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1] | |
351 if max_week == 1: | |
352 max_week = 53 | |
353 return 1 <= week <= max_week | |
354 | |
355 @staticmethod | |
356 def validate_month(month): | |
357 """Validate month.""" | |
358 | |
359 return 1 <= month <= 12 | |
360 | |
361 @staticmethod | |
362 def validate_year(year): | |
363 """Validate year.""" | |
364 | |
365 return 1 <= year | |
366 | |
367 @staticmethod | |
368 def validate_hour(hour): | |
369 """Validate hour.""" | |
370 | |
371 return 0 <= hour <= 23 | |
372 | |
373 @staticmethod | |
374 def validate_minutes(minutes): | |
375 """Validate minutes.""" | |
376 | |
377 return 0 <= minutes <= 59 | |
378 | |
379 @classmethod | |
380 def parse_value(cls, itype, value): | |
381 """Parse the input value.""" | |
382 | |
383 parsed = None | |
384 if itype == "date": | |
385 m = RE_DATE.match(value) | |
386 if m: | |
387 year = int(m.group('year'), 10) | |
388 month = int(m.group('month'), 10) | |
389 day = int(m.group('day'), 10) | |
390 if cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day): | |
391 parsed = (year, month, day) | |
392 elif itype == "month": | |
393 m = RE_MONTH.match(value) | |
394 if m: | |
395 year = int(m.group('year'), 10) | |
396 month = int(m.group('month'), 10) | |
397 if cls.validate_year(year) and cls.validate_month(month): | |
398 parsed = (year, month) | |
399 elif itype == "week": | |
400 m = RE_WEEK.match(value) | |
401 if m: | |
402 year = int(m.group('year'), 10) | |
403 week = int(m.group('week'), 10) | |
404 if cls.validate_year(year) and cls.validate_week(year, week): | |
405 parsed = (year, week) | |
406 elif itype == "time": | |
407 m = RE_TIME.match(value) | |
408 if m: | |
409 hour = int(m.group('hour'), 10) | |
410 minutes = int(m.group('minutes'), 10) | |
411 if cls.validate_hour(hour) and cls.validate_minutes(minutes): | |
412 parsed = (hour, minutes) | |
413 elif itype == "datetime-local": | |
414 m = RE_DATETIME.match(value) | |
415 if m: | |
416 year = int(m.group('year'), 10) | |
417 month = int(m.group('month'), 10) | |
418 day = int(m.group('day'), 10) | |
419 hour = int(m.group('hour'), 10) | |
420 minutes = int(m.group('minutes'), 10) | |
421 if ( | |
422 cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day) and | |
423 cls.validate_hour(hour) and cls.validate_minutes(minutes) | |
424 ): | |
425 parsed = (year, month, day, hour, minutes) | |
426 elif itype in ("number", "range"): | |
427 m = RE_NUM.match(value) | |
428 if m: | |
429 parsed = float(m.group('value')) | |
430 return parsed | |
431 | |
432 | |
433 class _Match(object): | |
434 """Perform CSS matching.""" | |
435 | |
436 def __init__(self, selectors, scope, namespaces, flags): | |
437 """Initialize.""" | |
438 | |
439 self.assert_valid_input(scope) | |
440 self.tag = scope | |
441 self.cached_meta_lang = [] | |
442 self.cached_default_forms = [] | |
443 self.cached_indeterminate_forms = [] | |
444 self.selectors = selectors | |
445 self.namespaces = {} if namespaces is None else namespaces | |
446 self.flags = flags | |
447 self.iframe_restrict = False | |
448 | |
449 # Find the root element for the whole tree | |
450 doc = scope | |
451 parent = self.get_parent(doc) | |
452 while parent: | |
453 doc = parent | |
454 parent = self.get_parent(doc) | |
455 root = None | |
456 if not self.is_doc(doc): | |
457 root = doc | |
458 else: | |
459 for child in self.get_children(doc): | |
460 root = child | |
461 break | |
462 | |
463 self.root = root | |
464 self.scope = scope if scope is not doc else root | |
465 self.has_html_namespace = self.has_html_ns(root) | |
466 | |
467 # A document can be both XML and HTML (XHTML) | |
468 self.is_xml = self.is_xml_tree(doc) | |
469 self.is_html = not self.is_xml or self.has_html_namespace | |
470 | |
471 def supports_namespaces(self): | |
472 """Check if namespaces are supported in the HTML type.""" | |
473 | |
474 return self.is_xml or self.has_html_namespace | |
475 | |
476 def get_tag_ns(self, el): | |
477 """Get tag namespace.""" | |
478 | |
479 if self.supports_namespaces(): | |
480 namespace = '' | |
481 ns = self.get_uri(el) | |
482 if ns: | |
483 namespace = ns | |
484 else: | |
485 namespace = NS_XHTML | |
486 return namespace | |
487 | |
488 def is_html_tag(self, el): | |
489 """Check if tag is in HTML namespace.""" | |
490 | |
491 return self.get_tag_ns(el) == NS_XHTML | |
492 | |
493 def get_tag(self, el): | |
494 """Get tag.""" | |
495 | |
496 name = self.get_tag_name(el) | |
497 return util.lower(name) if name is not None and not self.is_xml else name | |
498 | |
499 def get_prefix(self, el): | |
500 """Get prefix.""" | |
501 | |
502 prefix = self.get_prefix_name(el) | |
503 return util.lower(prefix) if prefix is not None and not self.is_xml else prefix | |
504 | |
505 def find_bidi(self, el): | |
506 """Get directionality from element text.""" | |
507 | |
508 for node in self.get_children(el, tags=False): | |
509 | |
510 # Analyze child text nodes | |
511 if self.is_tag(node): | |
512 | |
513 # Avoid analyzing certain elements specified in the specification. | |
514 direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(node, 'dir', '')), None) | |
515 if ( | |
516 self.get_tag(node) in ('bdi', 'script', 'style', 'textarea', 'iframe') or | |
517 not self.is_html_tag(node) or | |
518 direction is not None | |
519 ): | |
520 continue # pragma: no cover | |
521 | |
522 # Check directionality of this node's text | |
523 value = self.find_bidi(node) | |
524 if value is not None: | |
525 return value | |
526 | |
527 # Direction could not be determined | |
528 continue # pragma: no cover | |
529 | |
530 # Skip `doctype` comments, etc. | |
531 if self.is_special_string(node): | |
532 continue | |
533 | |
534 # Analyze text nodes for directionality. | |
535 for c in node: | |
536 bidi = unicodedata.bidirectional(c) | |
537 if bidi in ('AL', 'R', 'L'): | |
538 return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL | |
539 return None | |
540 | |
541 def extended_language_filter(self, lang_range, lang_tag): | |
542 """Filter the language tags.""" | |
543 | |
544 match = True | |
545 lang_range = RE_WILD_STRIP.sub('-', lang_range).lower() | |
546 ranges = lang_range.split('-') | |
547 subtags = lang_tag.lower().split('-') | |
548 length = len(ranges) | |
549 rindex = 0 | |
550 sindex = 0 | |
551 r = ranges[rindex] | |
552 s = subtags[sindex] | |
553 | |
554 # Primary tag needs to match | |
555 if r != '*' and r != s: | |
556 match = False | |
557 | |
558 rindex += 1 | |
559 sindex += 1 | |
560 | |
561 # Match until we run out of ranges | |
562 while match and rindex < length: | |
563 r = ranges[rindex] | |
564 try: | |
565 s = subtags[sindex] | |
566 except IndexError: | |
567 # Ran out of subtags, | |
568 # but we still have ranges | |
569 match = False | |
570 continue | |
571 | |
572 # Empty range | |
573 if not r: | |
574 match = False | |
575 continue | |
576 | |
577 # Matched range | |
578 elif s == r: | |
579 rindex += 1 | |
580 | |
581 # Implicit wildcard cannot match | |
582 # singletons | |
583 elif len(s) == 1: | |
584 match = False | |
585 continue | |
586 | |
587 # Implicitly matched, so grab next subtag | |
588 sindex += 1 | |
589 | |
590 return match | |
591 | |
592 def match_attribute_name(self, el, attr, prefix): | |
593 """Match attribute name and return value if it exists.""" | |
594 | |
595 value = None | |
596 if self.supports_namespaces(): | |
597 value = None | |
598 # If we have not defined namespaces, we can't very well find them, so don't bother trying. | |
599 if prefix: | |
600 ns = self.namespaces.get(prefix) | |
601 if ns is None and prefix != '*': | |
602 return None | |
603 else: | |
604 ns = None | |
605 | |
606 for k, v in self.iter_attributes(el): | |
607 | |
608 # Get attribute parts | |
609 namespace, name = self.split_namespace(el, k) | |
610 | |
611 # Can't match a prefix attribute as we haven't specified one to match | |
612 # Try to match it normally as a whole `p:a` as selector may be trying `p\:a`. | |
613 if ns is None: | |
614 if (self.is_xml and attr == k) or (not self.is_xml and util.lower(attr) == util.lower(k)): | |
615 value = v | |
616 break | |
617 # Coverage is not finding this even though it is executed. | |
618 # Adding a print statement before this (and erasing coverage) causes coverage to find the line. | |
619 # Ignore the false positive message. | |
620 continue # pragma: no cover | |
621 | |
622 # We can't match our desired prefix attribute as the attribute doesn't have a prefix | |
623 if namespace is None or ns != namespace and prefix != '*': | |
624 continue | |
625 | |
626 # The attribute doesn't match. | |
627 if (util.lower(attr) != util.lower(name)) if not self.is_xml else (attr != name): | |
628 continue | |
629 | |
630 value = v | |
631 break | |
632 else: | |
633 for k, v in self.iter_attributes(el): | |
634 if util.lower(attr) != util.lower(k): | |
635 continue | |
636 value = v | |
637 break | |
638 return value | |
639 | |
640 def match_namespace(self, el, tag): | |
641 """Match the namespace of the element.""" | |
642 | |
643 match = True | |
644 namespace = self.get_tag_ns(el) | |
645 default_namespace = self.namespaces.get('') | |
646 tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix, None) | |
647 # We must match the default namespace if one is not provided | |
648 if tag.prefix is None and (default_namespace is not None and namespace != default_namespace): | |
649 match = False | |
650 # If we specified `|tag`, we must not have a namespace. | |
651 elif (tag.prefix is not None and tag.prefix == '' and namespace): | |
652 match = False | |
653 # Verify prefix matches | |
654 elif ( | |
655 tag.prefix and | |
656 tag.prefix != '*' and (tag_ns is None or namespace != tag_ns) | |
657 ): | |
658 match = False | |
659 return match | |
660 | |
661 def match_attributes(self, el, attributes): | |
662 """Match attributes.""" | |
663 | |
664 match = True | |
665 if attributes: | |
666 for a in attributes: | |
667 value = self.match_attribute_name(el, a.attribute, a.prefix) | |
668 pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern | |
669 if isinstance(value, list): | |
670 value = ' '.join(value) | |
671 if value is None: | |
672 match = False | |
673 break | |
674 elif pattern is None: | |
675 continue | |
676 elif pattern.match(value) is None: | |
677 match = False | |
678 break | |
679 return match | |
680 | |
681 def match_tagname(self, el, tag): | |
682 """Match tag name.""" | |
683 | |
684 name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name) | |
685 return not ( | |
686 name is not None and | |
687 name not in (self.get_tag(el), '*') | |
688 ) | |
689 | |
690 def match_tag(self, el, tag): | |
691 """Match the tag.""" | |
692 | |
693 match = True | |
694 if tag is not None: | |
695 # Verify namespace | |
696 if not self.match_namespace(el, tag): | |
697 match = False | |
698 if not self.match_tagname(el, tag): | |
699 match = False | |
700 return match | |
701 | |
702 def match_past_relations(self, el, relation): | |
703 """Match past relationship.""" | |
704 | |
705 found = False | |
706 if relation[0].rel_type == REL_PARENT: | |
707 parent = self.get_parent(el, no_iframe=self.iframe_restrict) | |
708 while not found and parent: | |
709 found = self.match_selectors(parent, relation) | |
710 parent = self.get_parent(parent, no_iframe=self.iframe_restrict) | |
711 elif relation[0].rel_type == REL_CLOSE_PARENT: | |
712 parent = self.get_parent(el, no_iframe=self.iframe_restrict) | |
713 if parent: | |
714 found = self.match_selectors(parent, relation) | |
715 elif relation[0].rel_type == REL_SIBLING: | |
716 sibling = self.get_previous(el) | |
717 while not found and sibling: | |
718 found = self.match_selectors(sibling, relation) | |
719 sibling = self.get_previous(sibling) | |
720 elif relation[0].rel_type == REL_CLOSE_SIBLING: | |
721 sibling = self.get_previous(el) | |
722 if sibling and self.is_tag(sibling): | |
723 found = self.match_selectors(sibling, relation) | |
724 return found | |
725 | |
726 def match_future_child(self, parent, relation, recursive=False): | |
727 """Match future child.""" | |
728 | |
729 match = False | |
730 children = self.get_descendants if recursive else self.get_children | |
731 for child in children(parent, no_iframe=self.iframe_restrict): | |
732 match = self.match_selectors(child, relation) | |
733 if match: | |
734 break | |
735 return match | |
736 | |
737 def match_future_relations(self, el, relation): | |
738 """Match future relationship.""" | |
739 | |
740 found = False | |
741 if relation[0].rel_type == REL_HAS_PARENT: | |
742 found = self.match_future_child(el, relation, True) | |
743 elif relation[0].rel_type == REL_HAS_CLOSE_PARENT: | |
744 found = self.match_future_child(el, relation) | |
745 elif relation[0].rel_type == REL_HAS_SIBLING: | |
746 sibling = self.get_next(el) | |
747 while not found and sibling: | |
748 found = self.match_selectors(sibling, relation) | |
749 sibling = self.get_next(sibling) | |
750 elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING: | |
751 sibling = self.get_next(el) | |
752 if sibling and self.is_tag(sibling): | |
753 found = self.match_selectors(sibling, relation) | |
754 return found | |
755 | |
756 def match_relations(self, el, relation): | |
757 """Match relationship to other elements.""" | |
758 | |
759 found = False | |
760 | |
761 if relation[0].rel_type.startswith(':'): | |
762 found = self.match_future_relations(el, relation) | |
763 else: | |
764 found = self.match_past_relations(el, relation) | |
765 | |
766 return found | |
767 | |
768 def match_id(self, el, ids): | |
769 """Match element's ID.""" | |
770 | |
771 found = True | |
772 for i in ids: | |
773 if i != self.get_attribute_by_name(el, 'id', ''): | |
774 found = False | |
775 break | |
776 return found | |
777 | |
778 def match_classes(self, el, classes): | |
779 """Match element's classes.""" | |
780 | |
781 current_classes = self.get_classes(el) | |
782 found = True | |
783 for c in classes: | |
784 if c not in current_classes: | |
785 found = False | |
786 break | |
787 return found | |
788 | |
789 def match_root(self, el): | |
790 """Match element as root.""" | |
791 | |
792 is_root = self.is_root(el) | |
793 if is_root: | |
794 sibling = self.get_previous(el, tags=False) | |
795 while is_root and sibling is not None: | |
796 if ( | |
797 self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or | |
798 self.is_cdata(sibling) | |
799 ): | |
800 is_root = False | |
801 else: | |
802 sibling = self.get_previous(sibling, tags=False) | |
803 if is_root: | |
804 sibling = self.get_next(el, tags=False) | |
805 while is_root and sibling is not None: | |
806 if ( | |
807 self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or | |
808 self.is_cdata(sibling) | |
809 ): | |
810 is_root = False | |
811 else: | |
812 sibling = self.get_next(sibling, tags=False) | |
813 return is_root | |
814 | |
815 def match_scope(self, el): | |
816 """Match element as scope.""" | |
817 | |
818 return self.scope is el | |
819 | |
820 def match_nth_tag_type(self, el, child): | |
821 """Match tag type for `nth` matches.""" | |
822 | |
823 return( | |
824 (self.get_tag(child) == self.get_tag(el)) and | |
825 (self.get_tag_ns(child) == self.get_tag_ns(el)) | |
826 ) | |
827 | |
828 def match_nth(self, el, nth): | |
829 """Match `nth` elements.""" | |
830 | |
831 matched = True | |
832 | |
833 for n in nth: | |
834 matched = False | |
835 if n.selectors and not self.match_selectors(el, n.selectors): | |
836 break | |
837 parent = self.get_parent(el) | |
838 if parent is None: | |
839 parent = self.create_fake_parent(el) | |
840 last = n.last | |
841 last_index = len(parent) - 1 | |
842 index = last_index if last else 0 | |
843 relative_index = 0 | |
844 a = n.a | |
845 b = n.b | |
846 var = n.n | |
847 count = 0 | |
848 count_incr = 1 | |
849 factor = -1 if last else 1 | |
850 idx = last_idx = a * count + b if var else a | |
851 | |
852 # We can only adjust bounds within a variable index | |
853 if var: | |
854 # Abort if our nth index is out of bounds and only getting further out of bounds as we increment. | |
855 # Otherwise, increment to try to get in bounds. | |
856 adjust = None | |
857 while idx < 1 or idx > last_index: | |
858 if idx < 0: | |
859 diff_low = 0 - idx | |
860 if adjust is not None and adjust == 1: | |
861 break | |
862 adjust = -1 | |
863 count += count_incr | |
864 idx = last_idx = a * count + b if var else a | |
865 diff = 0 - idx | |
866 if diff >= diff_low: | |
867 break | |
868 else: | |
869 diff_high = idx - last_index | |
870 if adjust is not None and adjust == -1: | |
871 break | |
872 adjust = 1 | |
873 count += count_incr | |
874 idx = last_idx = a * count + b if var else a | |
875 diff = idx - last_index | |
876 if diff >= diff_high: | |
877 break | |
878 diff_high = diff | |
879 | |
880 # If a < 0, our count is working backwards, so floor the index by increasing the count. | |
881 # Find the count that yields the lowest, in bound value and use that. | |
882 # Lastly reverse count increment so that we'll increase our index. | |
883 lowest = count | |
884 if a < 0: | |
885 while idx >= 1: | |
886 lowest = count | |
887 count += count_incr | |
888 idx = last_idx = a * count + b if var else a | |
889 count_incr = -1 | |
890 count = lowest | |
891 idx = last_idx = a * count + b if var else a | |
892 | |
893 # Evaluate elements while our calculated nth index is still in range | |
894 while 1 <= idx <= last_index + 1: | |
895 child = None | |
896 # Evaluate while our child index is still in range. | |
897 for child in self.get_children(parent, start=index, reverse=factor < 0, tags=False): | |
898 index += factor | |
899 if not self.is_tag(child): | |
900 continue | |
901 # Handle `of S` in `nth-child` | |
902 if n.selectors and not self.match_selectors(child, n.selectors): | |
903 continue | |
904 # Handle `of-type` | |
905 if n.of_type and not self.match_nth_tag_type(el, child): | |
906 continue | |
907 relative_index += 1 | |
908 if relative_index == idx: | |
909 if child is el: | |
910 matched = True | |
911 else: | |
912 break | |
913 if child is el: | |
914 break | |
915 if child is el: | |
916 break | |
917 last_idx = idx | |
918 count += count_incr | |
919 if count < 0: | |
920 # Count is counting down and has now ventured into invalid territory. | |
921 break | |
922 idx = a * count + b if var else a | |
923 if last_idx == idx: | |
924 break | |
925 if not matched: | |
926 break | |
927 return matched | |
928 | |
929 def match_empty(self, el): | |
930 """Check if element is empty (if requested).""" | |
931 | |
932 is_empty = True | |
933 for child in self.get_children(el, tags=False): | |
934 if self.is_tag(child): | |
935 is_empty = False | |
936 break | |
937 elif self.is_content_string(child) and RE_NOT_EMPTY.search(child): | |
938 is_empty = False | |
939 break | |
940 return is_empty | |
941 | |
942 def match_subselectors(self, el, selectors): | |
943 """Match selectors.""" | |
944 | |
945 match = True | |
946 for sel in selectors: | |
947 if not self.match_selectors(el, sel): | |
948 match = False | |
949 return match | |
950 | |
951 def match_contains(self, el, contains): | |
952 """Match element if it contains text.""" | |
953 | |
954 match = True | |
955 content = None | |
956 for contain_list in contains: | |
957 if content is None: | |
958 content = self.get_text(el, no_iframe=self.is_html) | |
959 found = False | |
960 for text in contain_list.text: | |
961 if text in content: | |
962 found = True | |
963 break | |
964 if not found: | |
965 match = False | |
966 return match | |
967 | |
968 def match_default(self, el): | |
969 """Match default.""" | |
970 | |
971 match = False | |
972 | |
973 # Find this input's form | |
974 form = None | |
975 parent = self.get_parent(el, no_iframe=True) | |
976 while parent and form is None: | |
977 if self.get_tag(parent) == 'form' and self.is_html_tag(parent): | |
978 form = parent | |
979 else: | |
980 parent = self.get_parent(parent, no_iframe=True) | |
981 | |
982 # Look in form cache to see if we've already located its default button | |
983 found_form = False | |
984 for f, t in self.cached_default_forms: | |
985 if f is form: | |
986 found_form = True | |
987 if t is el: | |
988 match = True | |
989 break | |
990 | |
991 # We didn't have the form cached, so look for its default button | |
992 if not found_form: | |
993 for child in self.get_descendants(form, no_iframe=True): | |
994 name = self.get_tag(child) | |
995 # Can't do nested forms (haven't figured out why we never hit this) | |
996 if name == 'form': # pragma: no cover | |
997 break | |
998 if name in ('input', 'button'): | |
999 v = self.get_attribute_by_name(child, 'type', '') | |
1000 if v and util.lower(v) == 'submit': | |
1001 self.cached_default_forms.append([form, child]) | |
1002 if el is child: | |
1003 match = True | |
1004 break | |
1005 return match | |
1006 | |
1007 def match_indeterminate(self, el): | |
1008 """Match default.""" | |
1009 | |
1010 match = False | |
1011 name = self.get_attribute_by_name(el, 'name') | |
1012 | |
1013 def get_parent_form(el): | |
1014 """Find this input's form.""" | |
1015 form = None | |
1016 parent = self.get_parent(el, no_iframe=True) | |
1017 while form is None: | |
1018 if self.get_tag(parent) == 'form' and self.is_html_tag(parent): | |
1019 form = parent | |
1020 break | |
1021 last_parent = parent | |
1022 parent = self.get_parent(parent, no_iframe=True) | |
1023 if parent is None: | |
1024 form = last_parent | |
1025 break | |
1026 return form | |
1027 | |
1028 form = get_parent_form(el) | |
1029 | |
1030 # Look in form cache to see if we've already evaluated that its fellow radio buttons are indeterminate | |
1031 found_form = False | |
1032 for f, n, i in self.cached_indeterminate_forms: | |
1033 if f is form and n == name: | |
1034 found_form = True | |
1035 if i is True: | |
1036 match = True | |
1037 break | |
1038 | |
1039 # We didn't have the form cached, so validate that the radio button is indeterminate | |
1040 if not found_form: | |
1041 checked = False | |
1042 for child in self.get_descendants(form, no_iframe=True): | |
1043 if child is el: | |
1044 continue | |
1045 tag_name = self.get_tag(child) | |
1046 if tag_name == 'input': | |
1047 is_radio = False | |
1048 check = False | |
1049 has_name = False | |
1050 for k, v in self.iter_attributes(child): | |
1051 if util.lower(k) == 'type' and util.lower(v) == 'radio': | |
1052 is_radio = True | |
1053 elif util.lower(k) == 'name' and v == name: | |
1054 has_name = True | |
1055 elif util.lower(k) == 'checked': | |
1056 check = True | |
1057 if is_radio and check and has_name and get_parent_form(child) is form: | |
1058 checked = True | |
1059 break | |
1060 if checked: | |
1061 break | |
1062 if not checked: | |
1063 match = True | |
1064 self.cached_indeterminate_forms.append([form, name, match]) | |
1065 | |
1066 return match | |
1067 | |
1068 def match_lang(self, el, langs): | |
1069 """Match languages.""" | |
1070 | |
1071 match = False | |
1072 has_ns = self.supports_namespaces() | |
1073 root = self.root | |
1074 has_html_namespace = self.has_html_namespace | |
1075 | |
1076 # Walk parents looking for `lang` (HTML) or `xml:lang` XML property. | |
1077 parent = el | |
1078 found_lang = None | |
1079 last = None | |
1080 while not found_lang: | |
1081 has_html_ns = self.has_html_ns(parent) | |
1082 for k, v in self.iter_attributes(parent): | |
1083 attr_ns, attr = self.split_namespace(parent, k) | |
1084 if ( | |
1085 ((not has_ns or has_html_ns) and (util.lower(k) if not self.is_xml else k) == 'lang') or | |
1086 ( | |
1087 has_ns and not has_html_ns and attr_ns == NS_XML and | |
1088 (util.lower(attr) if not self.is_xml and attr is not None else attr) == 'lang' | |
1089 ) | |
1090 ): | |
1091 found_lang = v | |
1092 break | |
1093 last = parent | |
1094 parent = self.get_parent(parent, no_iframe=self.is_html) | |
1095 | |
1096 if parent is None: | |
1097 root = last | |
1098 has_html_namespace = self.has_html_ns(root) | |
1099 parent = last | |
1100 break | |
1101 | |
1102 # Use cached meta language. | |
1103 if not found_lang and self.cached_meta_lang: | |
1104 for cache in self.cached_meta_lang: | |
1105 if root is cache[0]: | |
1106 found_lang = cache[1] | |
1107 | |
1108 # If we couldn't find a language, and the document is HTML, look to meta to determine language. | |
1109 if found_lang is None and (not self.is_xml or (has_html_namespace and root.name == 'html')): | |
1110 # Find head | |
1111 found = False | |
1112 for tag in ('html', 'head'): | |
1113 found = False | |
1114 for child in self.get_children(parent, no_iframe=self.is_html): | |
1115 if self.get_tag(child) == tag and self.is_html_tag(child): | |
1116 found = True | |
1117 parent = child | |
1118 break | |
1119 if not found: # pragma: no cover | |
1120 break | |
1121 | |
1122 # Search meta tags | |
1123 if found: | |
1124 for child in parent: | |
1125 if self.is_tag(child) and self.get_tag(child) == 'meta' and self.is_html_tag(parent): | |
1126 c_lang = False | |
1127 content = None | |
1128 for k, v in self.iter_attributes(child): | |
1129 if util.lower(k) == 'http-equiv' and util.lower(v) == 'content-language': | |
1130 c_lang = True | |
1131 if util.lower(k) == 'content': | |
1132 content = v | |
1133 if c_lang and content: | |
1134 found_lang = content | |
1135 self.cached_meta_lang.append((root, found_lang)) | |
1136 break | |
1137 if found_lang: | |
1138 break | |
1139 if not found_lang: | |
1140 self.cached_meta_lang.append((root, False)) | |
1141 | |
1142 # If we determined a language, compare. | |
1143 if found_lang: | |
1144 for patterns in langs: | |
1145 match = False | |
1146 for pattern in patterns: | |
1147 if self.extended_language_filter(pattern, found_lang): | |
1148 match = True | |
1149 if not match: | |
1150 break | |
1151 | |
1152 return match | |
1153 | |
1154 def match_dir(self, el, directionality): | |
1155 """Check directionality.""" | |
1156 | |
1157 # If we have to match both left and right, we can't match either. | |
1158 if directionality & ct.SEL_DIR_LTR and directionality & ct.SEL_DIR_RTL: | |
1159 return False | |
1160 | |
1161 if el is None or not self.is_html_tag(el): | |
1162 return False | |
1163 | |
1164 # Element has defined direction of left to right or right to left | |
1165 direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(el, 'dir', '')), None) | |
1166 if direction not in (None, 0): | |
1167 return direction == directionality | |
1168 | |
1169 # Element is the document element (the root) and no direction assigned, assume left to right. | |
1170 is_root = self.is_root(el) | |
1171 if is_root and direction is None: | |
1172 return ct.SEL_DIR_LTR == directionality | |
1173 | |
1174 # If `input[type=telephone]` and no direction is assigned, assume left to right. | |
1175 name = self.get_tag(el) | |
1176 is_input = name == 'input' | |
1177 is_textarea = name == 'textarea' | |
1178 is_bdi = name == 'bdi' | |
1179 itype = util.lower(self.get_attribute_by_name(el, 'type', '')) if is_input else '' | |
1180 if is_input and itype == 'tel' and direction is None: | |
1181 return ct.SEL_DIR_LTR == directionality | |
1182 | |
1183 # Auto handling for text inputs | |
1184 if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0: | |
1185 if is_textarea: | |
1186 value = [] | |
1187 for node in self.get_contents(el, no_iframe=True): | |
1188 if self.is_content_string(node): | |
1189 value.append(node) | |
1190 value = ''.join(value) | |
1191 else: | |
1192 value = self.get_attribute_by_name(el, 'value', '') | |
1193 if value: | |
1194 for c in value: | |
1195 bidi = unicodedata.bidirectional(c) | |
1196 if bidi in ('AL', 'R', 'L'): | |
1197 direction = ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL | |
1198 return direction == directionality | |
1199 # Assume left to right | |
1200 return ct.SEL_DIR_LTR == directionality | |
1201 elif is_root: | |
1202 return ct.SEL_DIR_LTR == directionality | |
1203 return self.match_dir(self.get_parent(el, no_iframe=True), directionality) | |
1204 | |
1205 # Auto handling for `bdi` and other non text inputs. | |
1206 if (is_bdi and direction is None) or direction == 0: | |
1207 direction = self.find_bidi(el) | |
1208 if direction is not None: | |
1209 return direction == directionality | |
1210 elif is_root: | |
1211 return ct.SEL_DIR_LTR == directionality | |
1212 return self.match_dir(self.get_parent(el, no_iframe=True), directionality) | |
1213 | |
1214 # Match parents direction | |
1215 return self.match_dir(self.get_parent(el, no_iframe=True), directionality) | |
1216 | |
1217 def match_range(self, el, condition): | |
1218 """ | |
1219 Match range. | |
1220 | |
1221 Behavior is modeled after what we see in browsers. Browsers seem to evaluate | |
1222 if the value is out of range, and if not, it is in range. So a missing value | |
1223 will not evaluate out of range; therefore, value is in range. Personally, I | |
1224 feel like this should evaluate as neither in or out of range. | |
1225 """ | |
1226 | |
1227 out_of_range = False | |
1228 | |
1229 itype = util.lower(self.get_attribute_by_name(el, 'type')) | |
1230 mn = self.get_attribute_by_name(el, 'min', None) | |
1231 if mn is not None: | |
1232 mn = Inputs.parse_value(itype, mn) | |
1233 mx = self.get_attribute_by_name(el, 'max', None) | |
1234 if mx is not None: | |
1235 mx = Inputs.parse_value(itype, mx) | |
1236 | |
1237 # There is no valid min or max, so we cannot evaluate a range | |
1238 if mn is None and mx is None: | |
1239 return False | |
1240 | |
1241 value = self.get_attribute_by_name(el, 'value', None) | |
1242 if value is not None: | |
1243 value = Inputs.parse_value(itype, value) | |
1244 if value is not None: | |
1245 if itype in ("date", "datetime-local", "month", "week", "number", "range"): | |
1246 if mn is not None and value < mn: | |
1247 out_of_range = True | |
1248 if not out_of_range and mx is not None and value > mx: | |
1249 out_of_range = True | |
1250 elif itype == "time": | |
1251 if mn is not None and mx is not None and mn > mx: | |
1252 # Time is periodic, so this is a reversed/discontinuous range | |
1253 if value < mn and value > mx: | |
1254 out_of_range = True | |
1255 else: | |
1256 if mn is not None and value < mn: | |
1257 out_of_range = True | |
1258 if not out_of_range and mx is not None and value > mx: | |
1259 out_of_range = True | |
1260 | |
1261 return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range | |
1262 | |
1263 def match_defined(self, el): | |
1264 """ | |
1265 Match defined. | |
1266 | |
1267 `:defined` is related to custom elements in a browser. | |
1268 | |
1269 - If the document is XML (not XHTML), all tags will match. | |
1270 - Tags that are not custom (don't have a hyphen) are marked defined. | |
1271 - If the tag has a prefix (without or without a namespace), it will not match. | |
1272 | |
1273 This is of course requires the parser to provide us with the proper prefix and namespace info, | |
1274 if it doesn't, there is nothing we can do. | |
1275 """ | |
1276 | |
1277 name = self.get_tag(el) | |
1278 return ( | |
1279 name.find('-') == -1 or | |
1280 name.find(':') != -1 or | |
1281 self.get_prefix(el) is not None | |
1282 ) | |
1283 | |
1284 def match_placeholder_shown(self, el): | |
1285 """ | |
1286 Match placeholder shown according to HTML spec. | |
1287 | |
1288 - text area should be checked if they have content. A single newline does not count as content. | |
1289 | |
1290 """ | |
1291 | |
1292 match = False | |
1293 content = self.get_text(el) | |
1294 if content in ('', '\n'): | |
1295 match = True | |
1296 | |
1297 return match | |
1298 | |
1299 def match_selectors(self, el, selectors): | |
1300 """Check if element matches one of the selectors.""" | |
1301 | |
1302 match = False | |
1303 is_not = selectors.is_not | |
1304 is_html = selectors.is_html | |
1305 | |
1306 # Internal selector lists that use the HTML flag, will automatically get the `html` namespace. | |
1307 if is_html: | |
1308 namespaces = self.namespaces | |
1309 iframe_restrict = self.iframe_restrict | |
1310 self.namespaces = {'html': NS_XHTML} | |
1311 self.iframe_restrict = True | |
1312 | |
1313 if not is_html or self.is_html: | |
1314 for selector in selectors: | |
1315 match = is_not | |
1316 # We have a un-matchable situation (like `:focus` as you can focus an element in this environment) | |
1317 if isinstance(selector, ct.SelectorNull): | |
1318 continue | |
1319 # Verify tag matches | |
1320 if not self.match_tag(el, selector.tag): | |
1321 continue | |
1322 # Verify tag is defined | |
1323 if selector.flags & ct.SEL_DEFINED and not self.match_defined(el): | |
1324 continue | |
1325 # Verify element is root | |
1326 if selector.flags & ct.SEL_ROOT and not self.match_root(el): | |
1327 continue | |
1328 # Verify element is scope | |
1329 if selector.flags & ct.SEL_SCOPE and not self.match_scope(el): | |
1330 continue | |
1331 # Verify element has placeholder shown | |
1332 if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el): | |
1333 continue | |
1334 # Verify `nth` matches | |
1335 if not self.match_nth(el, selector.nth): | |
1336 continue | |
1337 if selector.flags & ct.SEL_EMPTY and not self.match_empty(el): | |
1338 continue | |
1339 # Verify id matches | |
1340 if selector.ids and not self.match_id(el, selector.ids): | |
1341 continue | |
1342 # Verify classes match | |
1343 if selector.classes and not self.match_classes(el, selector.classes): | |
1344 continue | |
1345 # Verify attribute(s) match | |
1346 if not self.match_attributes(el, selector.attributes): | |
1347 continue | |
1348 # Verify ranges | |
1349 if selector.flags & RANGES and not self.match_range(el, selector.flags & RANGES): | |
1350 continue | |
1351 # Verify language patterns | |
1352 if selector.lang and not self.match_lang(el, selector.lang): | |
1353 continue | |
1354 # Verify pseudo selector patterns | |
1355 if selector.selectors and not self.match_subselectors(el, selector.selectors): | |
1356 continue | |
1357 # Verify relationship selectors | |
1358 if selector.relation and not self.match_relations(el, selector.relation): | |
1359 continue | |
1360 # Validate that the current default selector match corresponds to the first submit button in the form | |
1361 if selector.flags & ct.SEL_DEFAULT and not self.match_default(el): | |
1362 continue | |
1363 # Validate that the unset radio button is among radio buttons with the same name in a form that are | |
1364 # also not set. | |
1365 if selector.flags & ct.SEL_INDETERMINATE and not self.match_indeterminate(el): | |
1366 continue | |
1367 # Validate element directionality | |
1368 if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS): | |
1369 continue | |
1370 # Validate that the tag contains the specified text. | |
1371 if not self.match_contains(el, selector.contains): | |
1372 continue | |
1373 match = not is_not | |
1374 break | |
1375 | |
1376 # Restore actual namespaces being used for external selector lists | |
1377 if is_html: | |
1378 self.namespaces = namespaces | |
1379 self.iframe_restrict = iframe_restrict | |
1380 | |
1381 return match | |
1382 | |
1383 def select(self, limit=0): | |
1384 """Match all tags under the targeted tag.""" | |
1385 | |
1386 if limit < 1: | |
1387 limit = None | |
1388 | |
1389 for child in self.get_descendants(self.tag): | |
1390 if self.match(child): | |
1391 yield child | |
1392 if limit is not None: | |
1393 limit -= 1 | |
1394 if limit < 1: | |
1395 break | |
1396 | |
1397 def closest(self): | |
1398 """Match closest ancestor.""" | |
1399 | |
1400 current = self.tag | |
1401 closest = None | |
1402 while closest is None and current is not None: | |
1403 if self.match(current): | |
1404 closest = current | |
1405 else: | |
1406 current = self.get_parent(current) | |
1407 return closest | |
1408 | |
1409 def filter(self): # noqa A001 | |
1410 """Filter tag's children.""" | |
1411 | |
1412 return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)] | |
1413 | |
1414 def match(self, el): | |
1415 """Match.""" | |
1416 | |
1417 return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors) | |
1418 | |
1419 | |
1420 class CSSMatch(_DocumentNav, _Match): | |
1421 """The Beautiful Soup CSS match class.""" | |
1422 | |
1423 | |
1424 class SoupSieve(ct.Immutable): | |
1425 """Compiled Soup Sieve selector matching object.""" | |
1426 | |
1427 __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash") | |
1428 | |
1429 def __init__(self, pattern, selectors, namespaces, custom, flags): | |
1430 """Initialize.""" | |
1431 | |
1432 super(SoupSieve, self).__init__( | |
1433 pattern=pattern, | |
1434 selectors=selectors, | |
1435 namespaces=namespaces, | |
1436 custom=custom, | |
1437 flags=flags | |
1438 ) | |
1439 | |
1440 def match(self, tag): | |
1441 """Match.""" | |
1442 | |
1443 return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag) | |
1444 | |
1445 def closest(self, tag): | |
1446 """Match closest ancestor.""" | |
1447 | |
1448 return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest() | |
1449 | |
1450 def filter(self, iterable): # noqa A001 | |
1451 """ | |
1452 Filter. | |
1453 | |
1454 `CSSMatch` can cache certain searches for tags of the same document, | |
1455 so if we are given a tag, all tags are from the same document, | |
1456 and we can take advantage of the optimization. | |
1457 | |
1458 Any other kind of iterable could have tags from different documents or detached tags, | |
1459 so for those, we use a new `CSSMatch` for each item in the iterable. | |
1460 """ | |
1461 | |
1462 if CSSMatch.is_tag(iterable): | |
1463 return CSSMatch(self.selectors, iterable, self.namespaces, self.flags).filter() | |
1464 else: | |
1465 return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)] | |
1466 | |
1467 def select_one(self, tag): | |
1468 """Select a single tag.""" | |
1469 | |
1470 tags = self.select(tag, limit=1) | |
1471 return tags[0] if tags else None | |
1472 | |
1473 def select(self, tag, limit=0): | |
1474 """Select the specified tags.""" | |
1475 | |
1476 return list(self.iselect(tag, limit)) | |
1477 | |
1478 def iselect(self, tag, limit=0): | |
1479 """Iterate the specified tags.""" | |
1480 | |
1481 for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit): | |
1482 yield el | |
1483 | |
1484 def __repr__(self): # pragma: no cover | |
1485 """Representation.""" | |
1486 | |
1487 return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format( | |
1488 self.pattern, | |
1489 self.namespaces, | |
1490 self.custom, | |
1491 self.flags | |
1492 ) | |
1493 | |
1494 __str__ = __repr__ | |
1495 | |
1496 | |
1497 ct.pickle_register(SoupSieve) |