Mercurial > repos > shellac > sam_consensus_v3
annotate env/lib/python3.9/site-packages/bs4/element.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
rev | line source |
---|---|
0
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1 # Use of this source code is governed by the MIT license. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2 __license__ = "MIT" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
4 try: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
5 from collections.abc import Callable # Python 3.6 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
6 except ImportError as e: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
7 from collections import Callable |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
8 import re |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
9 import sys |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
10 import warnings |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
11 try: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
12 import soupsieve |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
13 except ImportError as e: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
14 soupsieve = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
15 warnings.warn( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
16 'The soupsieve package is not installed. CSS selectors cannot be used.' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
17 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
18 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
19 from bs4.formatter import ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
20 Formatter, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
21 HTMLFormatter, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
22 XMLFormatter, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
23 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
24 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
25 DEFAULT_OUTPUT_ENCODING = "utf-8" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
26 PY3K = (sys.version_info[0] > 2) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
27 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
28 nonwhitespace_re = re.compile(r"\S+") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
29 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
30 # NOTE: This isn't used as of 4.7.0. I'm leaving it for a little bit on |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
31 # the off chance someone imported it for their own use. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
32 whitespace_re = re.compile(r"\s+") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
33 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
34 def _alias(attr): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
35 """Alias one attribute name to another for backward compatibility""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
36 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
37 def alias(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
38 return getattr(self, attr) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
39 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
40 @alias.setter |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
41 def alias(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
42 return setattr(self, attr) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
43 return alias |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
44 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
45 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
46 # These encodings are recognized by Python (so PageElement.encode |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
47 # could theoretically support them) but XML and HTML don't recognize |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
48 # them (so they should not show up in an XML or HTML document as that |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
49 # document's encoding). |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
50 # |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
51 # If an XML document is encoded in one of these encodings, no encoding |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
52 # will be mentioned in the XML declaration. If an HTML document is |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
53 # encoded in one of these encodings, and the HTML document has a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
54 # <meta> tag that mentions an encoding, the encoding will be given as |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
55 # the empty string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
56 # |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
57 # Source: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
58 # https://docs.python.org/3/library/codecs.html#python-specific-encodings |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
59 PYTHON_SPECIFIC_ENCODINGS = set([ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
60 "idna", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
61 "mbcs", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
62 "oem", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
63 "palmos", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
64 "punycode", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
65 "raw_unicode_escape", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
66 "undefined", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
67 "unicode_escape", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
68 "raw-unicode-escape", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
69 "unicode-escape", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
70 "string-escape", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
71 "string_escape", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
72 ]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
73 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
74 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
75 class NamespacedAttribute(str): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
76 """A namespaced string (e.g. 'xml:lang') that remembers the namespace |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
77 ('xml') and the name ('lang') that were used to create it. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
78 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
79 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
80 def __new__(cls, prefix, name=None, namespace=None): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
81 if not name: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
82 # This is the default namespace. Its name "has no value" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
83 # per https://www.w3.org/TR/xml-names/#defaulting |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
84 name = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
85 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
86 if name is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
87 obj = str.__new__(cls, prefix) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
88 elif prefix is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
89 # Not really namespaced. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
90 obj = str.__new__(cls, name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
91 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
92 obj = str.__new__(cls, prefix + ":" + name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
93 obj.prefix = prefix |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
94 obj.name = name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
95 obj.namespace = namespace |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
96 return obj |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
97 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
98 class AttributeValueWithCharsetSubstitution(str): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
99 """A stand-in object for a character encoding specified in HTML.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
100 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
101 class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
102 """A generic stand-in for the value of a meta tag's 'charset' attribute. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
103 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
104 When Beautiful Soup parses the markup '<meta charset="utf8">', the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
105 value of the 'charset' attribute will be one of these objects. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
106 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
107 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
108 def __new__(cls, original_value): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
109 obj = str.__new__(cls, original_value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
110 obj.original_value = original_value |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
111 return obj |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
112 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
113 def encode(self, encoding): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
114 """When an HTML document is being encoded to a given encoding, the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
115 value of a meta tag's 'charset' is the name of the encoding. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
116 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
117 if encoding in PYTHON_SPECIFIC_ENCODINGS: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
118 return '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
119 return encoding |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
120 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
121 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
122 class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
123 """A generic stand-in for the value of a meta tag's 'content' attribute. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
124 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
125 When Beautiful Soup parses the markup: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
126 <meta http-equiv="content-type" content="text/html; charset=utf8"> |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
127 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
128 The value of the 'content' attribute will be one of these objects. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
129 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
130 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
131 CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
132 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
133 def __new__(cls, original_value): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
134 match = cls.CHARSET_RE.search(original_value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
135 if match is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
136 # No substitution necessary. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
137 return str.__new__(str, original_value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
138 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
139 obj = str.__new__(cls, original_value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
140 obj.original_value = original_value |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
141 return obj |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
142 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
143 def encode(self, encoding): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
144 if encoding in PYTHON_SPECIFIC_ENCODINGS: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
145 return '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
146 def rewrite(match): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
147 return match.group(1) + encoding |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
148 return self.CHARSET_RE.sub(rewrite, self.original_value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
149 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
150 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
151 class PageElement(object): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
152 """Contains the navigational information for some part of the page: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
153 that is, its current location in the parse tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
154 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
155 NavigableString, Tag, etc. are all subclasses of PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
156 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
157 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
158 def setup(self, parent=None, previous_element=None, next_element=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
159 previous_sibling=None, next_sibling=None): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
160 """Sets up the initial relations between this element and |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
161 other elements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
162 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
163 :param parent: The parent of this element. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
164 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
165 :param previous_element: The element parsed immediately before |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
166 this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
167 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
168 :param next_element: The element parsed immediately before |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
169 this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
170 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
171 :param previous_sibling: The most recently encountered element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
172 on the same level of the parse tree as this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
173 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
174 :param previous_sibling: The next element to be encountered |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
175 on the same level of the parse tree as this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
176 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
177 self.parent = parent |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
178 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
179 self.previous_element = previous_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
180 if previous_element is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
181 self.previous_element.next_element = self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
182 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
183 self.next_element = next_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
184 if self.next_element is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
185 self.next_element.previous_element = self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
186 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
187 self.next_sibling = next_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
188 if self.next_sibling is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
189 self.next_sibling.previous_sibling = self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
190 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
191 if (previous_sibling is None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
192 and self.parent is not None and self.parent.contents): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
193 previous_sibling = self.parent.contents[-1] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
194 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
195 self.previous_sibling = previous_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
196 if previous_sibling is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
197 self.previous_sibling.next_sibling = self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
198 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
199 def format_string(self, s, formatter): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
200 """Format the given string using the given formatter. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
201 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
202 :param s: A string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
203 :param formatter: A Formatter object, or a string naming one of the standard formatters. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
204 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
205 if formatter is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
206 return s |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
207 if not isinstance(formatter, Formatter): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
208 formatter = self.formatter_for_name(formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
209 output = formatter.substitute(s) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
210 return output |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
211 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
212 def formatter_for_name(self, formatter): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
213 """Look up or create a Formatter for the given identifier, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
214 if necessary. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
215 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
216 :param formatter: Can be a Formatter object (used as-is), a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
217 function (used as the entity substitution hook for an |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
218 XMLFormatter or HTMLFormatter), or a string (used to look |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
219 up an XMLFormatter or HTMLFormatter in the appropriate |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
220 registry. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
221 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
222 if isinstance(formatter, Formatter): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
223 return formatter |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
224 if self._is_xml: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
225 c = XMLFormatter |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
226 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
227 c = HTMLFormatter |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
228 if isinstance(formatter, Callable): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
229 return c(entity_substitution=formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
230 return c.REGISTRY[formatter] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
231 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
232 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
233 def _is_xml(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
234 """Is this element part of an XML tree or an HTML tree? |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
235 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
236 This is used in formatter_for_name, when deciding whether an |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
237 XMLFormatter or HTMLFormatter is more appropriate. It can be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
238 inefficient, but it should be called very rarely. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
239 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
240 if self.known_xml is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
241 # Most of the time we will have determined this when the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
242 # document is parsed. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
243 return self.known_xml |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
244 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
245 # Otherwise, it's likely that this element was created by |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
246 # direct invocation of the constructor from within the user's |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
247 # Python code. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
248 if self.parent is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
249 # This is the top-level object. It should have .known_xml set |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
250 # from tree creation. If not, take a guess--BS is usually |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
251 # used on HTML markup. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
252 return getattr(self, 'is_xml', False) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
253 return self.parent._is_xml |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
254 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
255 nextSibling = _alias("next_sibling") # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
256 previousSibling = _alias("previous_sibling") # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
257 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
258 def replace_with(self, replace_with): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
259 """Replace this PageElement with another one, keeping the rest of the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
260 tree the same. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
261 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
262 :param replace_with: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
263 :return: `self`, no longer part of the tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
264 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
265 if self.parent is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
266 raise ValueError( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
267 "Cannot replace one element with another when the " |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
268 "element to be replaced is not part of a tree.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
269 if replace_with is self: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
270 return |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
271 if replace_with is self.parent: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
272 raise ValueError("Cannot replace a Tag with its parent.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
273 old_parent = self.parent |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
274 my_index = self.parent.index(self) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
275 self.extract(_self_index=my_index) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
276 old_parent.insert(my_index, replace_with) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
277 return self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
278 replaceWith = replace_with # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
279 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
280 def unwrap(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
281 """Replace this PageElement with its contents. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
282 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
283 :return: `self`, no longer part of the tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
284 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
285 my_parent = self.parent |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
286 if self.parent is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
287 raise ValueError( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
288 "Cannot replace an element with its contents when that" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
289 "element is not part of a tree.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
290 my_index = self.parent.index(self) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
291 self.extract(_self_index=my_index) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
292 for child in reversed(self.contents[:]): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
293 my_parent.insert(my_index, child) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
294 return self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
295 replace_with_children = unwrap |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
296 replaceWithChildren = unwrap # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
297 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
298 def wrap(self, wrap_inside): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
299 """Wrap this PageElement inside another one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
300 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
301 :param wrap_inside: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
302 :return: `wrap_inside`, occupying the position in the tree that used |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
303 to be occupied by `self`, and with `self` inside it. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
304 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
305 me = self.replace_with(wrap_inside) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
306 wrap_inside.append(me) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
307 return wrap_inside |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
308 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
309 def extract(self, _self_index=None): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
310 """Destructively rips this element out of the tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
311 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
312 :param _self_index: The location of this element in its parent's |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
313 .contents, if known. Passing this in allows for a performance |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
314 optimization. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
315 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
316 :return: `self`, no longer part of the tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
317 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
318 if self.parent is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
319 if _self_index is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
320 _self_index = self.parent.index(self) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
321 del self.parent.contents[_self_index] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
322 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
323 #Find the two elements that would be next to each other if |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
324 #this element (and any children) hadn't been parsed. Connect |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
325 #the two. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
326 last_child = self._last_descendant() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
327 next_element = last_child.next_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
328 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
329 if (self.previous_element is not None and |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
330 self.previous_element is not next_element): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
331 self.previous_element.next_element = next_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
332 if next_element is not None and next_element is not self.previous_element: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
333 next_element.previous_element = self.previous_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
334 self.previous_element = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
335 last_child.next_element = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
336 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
337 self.parent = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
338 if (self.previous_sibling is not None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
339 and self.previous_sibling is not self.next_sibling): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
340 self.previous_sibling.next_sibling = self.next_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
341 if (self.next_sibling is not None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
342 and self.next_sibling is not self.previous_sibling): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
343 self.next_sibling.previous_sibling = self.previous_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
344 self.previous_sibling = self.next_sibling = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
345 return self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
346 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
347 def _last_descendant(self, is_initialized=True, accept_self=True): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
348 """Finds the last element beneath this object to be parsed. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
349 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
350 :param is_initialized: Has `setup` been called on this PageElement |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
351 yet? |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
352 :param accept_self: Is `self` an acceptable answer to the question? |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
353 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
354 if is_initialized and self.next_sibling is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
355 last_child = self.next_sibling.previous_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
356 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
357 last_child = self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
358 while isinstance(last_child, Tag) and last_child.contents: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
359 last_child = last_child.contents[-1] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
360 if not accept_self and last_child is self: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
361 last_child = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
362 return last_child |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
363 # BS3: Not part of the API! |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
364 _lastRecursiveChild = _last_descendant |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
365 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
366 def insert(self, position, new_child): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
367 """Insert a new PageElement in the list of this PageElement's children. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
368 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
369 This works the same way as `list.insert`. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
370 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
371 :param position: The numeric position that should be occupied |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
372 in `self.children` by the new PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
373 :param new_child: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
374 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
375 if new_child is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
376 raise ValueError("Cannot insert None into a tag.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
377 if new_child is self: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
378 raise ValueError("Cannot insert a tag into itself.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
379 if (isinstance(new_child, str) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
380 and not isinstance(new_child, NavigableString)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
381 new_child = NavigableString(new_child) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
382 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
383 from bs4 import BeautifulSoup |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
384 if isinstance(new_child, BeautifulSoup): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
385 # We don't want to end up with a situation where one BeautifulSoup |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
386 # object contains another. Insert the children one at a time. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
387 for subchild in list(new_child.contents): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
388 self.insert(position, subchild) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
389 position += 1 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
390 return |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
391 position = min(position, len(self.contents)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
392 if hasattr(new_child, 'parent') and new_child.parent is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
393 # We're 'inserting' an element that's already one |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
394 # of this object's children. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
395 if new_child.parent is self: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
396 current_index = self.index(new_child) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
397 if current_index < position: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
398 # We're moving this element further down the list |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
399 # of this object's children. That means that when |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
400 # we extract this element, our target index will |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
401 # jump down one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
402 position -= 1 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
403 new_child.extract() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
404 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
405 new_child.parent = self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
406 previous_child = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
407 if position == 0: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
408 new_child.previous_sibling = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
409 new_child.previous_element = self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
410 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
411 previous_child = self.contents[position - 1] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
412 new_child.previous_sibling = previous_child |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
413 new_child.previous_sibling.next_sibling = new_child |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
414 new_child.previous_element = previous_child._last_descendant(False) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
415 if new_child.previous_element is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
416 new_child.previous_element.next_element = new_child |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
417 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
418 new_childs_last_element = new_child._last_descendant(False) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
419 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
420 if position >= len(self.contents): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
421 new_child.next_sibling = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
422 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
423 parent = self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
424 parents_next_sibling = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
425 while parents_next_sibling is None and parent is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
426 parents_next_sibling = parent.next_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
427 parent = parent.parent |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
428 if parents_next_sibling is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
429 # We found the element that comes next in the document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
430 break |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
431 if parents_next_sibling is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
432 new_childs_last_element.next_element = parents_next_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
433 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
434 # The last element of this tag is the last element in |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
435 # the document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
436 new_childs_last_element.next_element = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
437 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
438 next_child = self.contents[position] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
439 new_child.next_sibling = next_child |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
440 if new_child.next_sibling is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
441 new_child.next_sibling.previous_sibling = new_child |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
442 new_childs_last_element.next_element = next_child |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
443 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
444 if new_childs_last_element.next_element is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
445 new_childs_last_element.next_element.previous_element = new_childs_last_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
446 self.contents.insert(position, new_child) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
447 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
448 def append(self, tag): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
449 """Appends the given PageElement to the contents of this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
450 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
451 :param tag: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
452 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
453 self.insert(len(self.contents), tag) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
454 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
455 def extend(self, tags): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
456 """Appends the given PageElements to this one's contents. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
457 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
458 :param tags: A list of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
459 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
460 if isinstance(tags, Tag): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
461 # Calling self.append() on another tag's contents will change |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
462 # the list we're iterating over. Make a list that won't |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
463 # change. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
464 tags = list(tags.contents) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
465 for tag in tags: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
466 self.append(tag) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
467 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
468 def insert_before(self, *args): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
469 """Makes the given element(s) the immediate predecessor of this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
470 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
471 All the elements will have the same parent, and the given elements |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
472 will be immediately before this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
473 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
474 :param args: One or more PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
475 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
476 parent = self.parent |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
477 if parent is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
478 raise ValueError( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
479 "Element has no parent, so 'before' has no meaning.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
480 if any(x is self for x in args): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
481 raise ValueError("Can't insert an element before itself.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
482 for predecessor in args: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
483 # Extract first so that the index won't be screwed up if they |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
484 # are siblings. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
485 if isinstance(predecessor, PageElement): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
486 predecessor.extract() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
487 index = parent.index(self) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
488 parent.insert(index, predecessor) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
489 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
490 def insert_after(self, *args): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
491 """Makes the given element(s) the immediate successor of this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
492 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
493 The elements will have the same parent, and the given elements |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
494 will be immediately after this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
495 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
496 :param args: One or more PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
497 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
498 # Do all error checking before modifying the tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
499 parent = self.parent |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
500 if parent is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
501 raise ValueError( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
502 "Element has no parent, so 'after' has no meaning.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
503 if any(x is self for x in args): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
504 raise ValueError("Can't insert an element after itself.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
505 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
506 offset = 0 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
507 for successor in args: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
508 # Extract first so that the index won't be screwed up if they |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
509 # are siblings. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
510 if isinstance(successor, PageElement): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
511 successor.extract() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
512 index = parent.index(self) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
513 parent.insert(index+1+offset, successor) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
514 offset += 1 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
515 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
516 def find_next(self, name=None, attrs={}, text=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
517 """Find the first PageElement that matches the given criteria and |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
518 appears later in the document than this PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
519 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
520 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
521 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
522 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
523 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
524 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
525 :param text: A filter for a NavigableString with specific text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
526 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
527 :return: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
528 :rtype: bs4.element.Tag | bs4.element.NavigableString |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
529 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
530 return self._find_one(self.find_all_next, name, attrs, text, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
531 findNext = find_next # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
532 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
533 def find_all_next(self, name=None, attrs={}, text=None, limit=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
534 **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
535 """Find all PageElements that match the given criteria and appear |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
536 later in the document than this PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
537 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
538 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
539 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
540 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
541 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
542 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
543 :param text: A filter for a NavigableString with specific text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
544 :param limit: Stop looking after finding this many results. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
545 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
546 :return: A ResultSet containing PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
547 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
548 return self._find_all(name, attrs, text, limit, self.next_elements, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
549 **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
550 findAllNext = find_all_next # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
551 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
552 def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
553 """Find the closest sibling to this PageElement that matches the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
554 given criteria and appears later in the document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
555 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
556 All find_* methods take a common set of arguments. See the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
557 online documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
558 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
559 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
560 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
561 :param text: A filter for a NavigableString with specific text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
562 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
563 :return: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
564 :rtype: bs4.element.Tag | bs4.element.NavigableString |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
565 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
566 return self._find_one(self.find_next_siblings, name, attrs, text, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
567 **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
568 findNextSibling = find_next_sibling # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
569 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
570 def find_next_siblings(self, name=None, attrs={}, text=None, limit=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
571 **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
572 """Find all siblings of this PageElement that match the given criteria |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
573 and appear later in the document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
574 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
575 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
576 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
577 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
578 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
579 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
580 :param text: A filter for a NavigableString with specific text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
581 :param limit: Stop looking after finding this many results. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
582 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
583 :return: A ResultSet of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
584 :rtype: bs4.element.ResultSet |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
585 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
586 return self._find_all(name, attrs, text, limit, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
587 self.next_siblings, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
588 findNextSiblings = find_next_siblings # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
589 fetchNextSiblings = find_next_siblings # BS2 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
590 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
591 def find_previous(self, name=None, attrs={}, text=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
592 """Look backwards in the document from this PageElement and find the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
593 first PageElement that matches the given criteria. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
594 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
595 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
596 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
597 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
598 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
599 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
600 :param text: A filter for a NavigableString with specific text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
601 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
602 :return: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
603 :rtype: bs4.element.Tag | bs4.element.NavigableString |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
604 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
605 return self._find_one( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
606 self.find_all_previous, name, attrs, text, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
607 findPrevious = find_previous # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
608 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
609 def find_all_previous(self, name=None, attrs={}, text=None, limit=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
610 **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
611 """Look backwards in the document from this PageElement and find all |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
612 PageElements that match the given criteria. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
613 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
614 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
615 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
616 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
617 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
618 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
619 :param text: A filter for a NavigableString with specific text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
620 :param limit: Stop looking after finding this many results. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
621 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
622 :return: A ResultSet of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
623 :rtype: bs4.element.ResultSet |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
624 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
625 return self._find_all(name, attrs, text, limit, self.previous_elements, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
626 **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
627 findAllPrevious = find_all_previous # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
628 fetchPrevious = find_all_previous # BS2 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
629 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
630 def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
631 """Returns the closest sibling to this PageElement that matches the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
632 given criteria and appears earlier in the document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
633 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
634 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
635 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
636 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
637 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
638 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
639 :param text: A filter for a NavigableString with specific text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
640 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
641 :return: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
642 :rtype: bs4.element.Tag | bs4.element.NavigableString |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
643 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
644 return self._find_one(self.find_previous_siblings, name, attrs, text, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
645 **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
646 findPreviousSibling = find_previous_sibling # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
647 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
648 def find_previous_siblings(self, name=None, attrs={}, text=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
649 limit=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
650 """Returns all siblings to this PageElement that match the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
651 given criteria and appear earlier in the document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
652 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
653 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
654 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
655 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
656 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
657 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
658 :param text: A filter for a NavigableString with specific text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
659 :param limit: Stop looking after finding this many results. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
660 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
661 :return: A ResultSet of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
662 :rtype: bs4.element.ResultSet |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
663 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
664 return self._find_all(name, attrs, text, limit, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
665 self.previous_siblings, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
666 findPreviousSiblings = find_previous_siblings # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
667 fetchPreviousSiblings = find_previous_siblings # BS2 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
668 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
669 def find_parent(self, name=None, attrs={}, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
670 """Find the closest parent of this PageElement that matches the given |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
671 criteria. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
672 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
673 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
674 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
675 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
676 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
677 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
678 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
679 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
680 :return: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
681 :rtype: bs4.element.Tag | bs4.element.NavigableString |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
682 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
683 # NOTE: We can't use _find_one because findParents takes a different |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
684 # set of arguments. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
685 r = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
686 l = self.find_parents(name, attrs, 1, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
687 if l: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
688 r = l[0] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
689 return r |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
690 findParent = find_parent # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
691 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
692 def find_parents(self, name=None, attrs={}, limit=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
693 """Find all parents of this PageElement that match the given criteria. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
694 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
695 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
696 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
697 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
698 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
699 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
700 :param limit: Stop looking after finding this many results. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
701 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
702 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
703 :return: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
704 :rtype: bs4.element.Tag | bs4.element.NavigableString |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
705 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
706 return self._find_all(name, attrs, None, limit, self.parents, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
707 **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
708 findParents = find_parents # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
709 fetchParents = find_parents # BS2 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
710 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
711 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
712 def next(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
713 """The PageElement, if any, that was parsed just after this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
714 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
715 :return: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
716 :rtype: bs4.element.Tag | bs4.element.NavigableString |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
717 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
718 return self.next_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
719 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
720 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
721 def previous(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
722 """The PageElement, if any, that was parsed just before this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
723 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
724 :return: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
725 :rtype: bs4.element.Tag | bs4.element.NavigableString |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
726 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
727 return self.previous_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
728 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
729 #These methods do the real heavy lifting. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
730 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
731 def _find_one(self, method, name, attrs, text, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
732 r = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
733 l = method(name, attrs, text, 1, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
734 if l: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
735 r = l[0] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
736 return r |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
737 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
738 def _find_all(self, name, attrs, text, limit, generator, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
739 "Iterates over a generator looking for things that match." |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
740 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
741 if text is None and 'string' in kwargs: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
742 text = kwargs['string'] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
743 del kwargs['string'] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
744 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
745 if isinstance(name, SoupStrainer): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
746 strainer = name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
747 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
748 strainer = SoupStrainer(name, attrs, text, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
749 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
750 if text is None and not limit and not attrs and not kwargs: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
751 if name is True or name is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
752 # Optimization to find all tags. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
753 result = (element for element in generator |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
754 if isinstance(element, Tag)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
755 return ResultSet(strainer, result) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
756 elif isinstance(name, str): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
757 # Optimization to find all tags with a given name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
758 if name.count(':') == 1: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
759 # This is a name with a prefix. If this is a namespace-aware document, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
760 # we need to match the local name against tag.name. If not, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
761 # we need to match the fully-qualified name against tag.name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
762 prefix, local_name = name.split(':', 1) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
763 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
764 prefix = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
765 local_name = name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
766 result = (element for element in generator |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
767 if isinstance(element, Tag) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
768 and ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
769 element.name == name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
770 ) or ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
771 element.name == local_name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
772 and (prefix is None or element.prefix == prefix) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
773 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
774 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
775 return ResultSet(strainer, result) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
776 results = ResultSet(strainer) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
777 while True: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
778 try: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
779 i = next(generator) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
780 except StopIteration: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
781 break |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
782 if i: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
783 found = strainer.search(i) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
784 if found: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
785 results.append(found) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
786 if limit and len(results) >= limit: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
787 break |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
788 return results |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
789 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
790 #These generators can be used to navigate starting from both |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
791 #NavigableStrings and Tags. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
792 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
793 def next_elements(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
794 """All PageElements that were parsed after this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
795 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
796 :yield: A sequence of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
797 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
798 i = self.next_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
799 while i is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
800 yield i |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
801 i = i.next_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
802 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
803 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
804 def next_siblings(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
805 """All PageElements that are siblings of this one but were parsed |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
806 later. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
807 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
808 :yield: A sequence of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
809 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
810 i = self.next_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
811 while i is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
812 yield i |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
813 i = i.next_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
814 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
815 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
816 def previous_elements(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
817 """All PageElements that were parsed before this one. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
818 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
819 :yield: A sequence of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
820 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
821 i = self.previous_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
822 while i is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
823 yield i |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
824 i = i.previous_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
825 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
826 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
827 def previous_siblings(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
828 """All PageElements that are siblings of this one but were parsed |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
829 earlier. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
830 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
831 :yield: A sequence of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
832 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
833 i = self.previous_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
834 while i is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
835 yield i |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
836 i = i.previous_sibling |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
837 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
838 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
839 def parents(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
840 """All PageElements that are parents of this PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
841 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
842 :yield: A sequence of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
843 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
844 i = self.parent |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
845 while i is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
846 yield i |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
847 i = i.parent |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
848 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
849 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
850 def decomposed(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
851 """Check whether a PageElement has been decomposed. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
852 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
853 :rtype: bool |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
854 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
855 return getattr(self, '_decomposed', False) or False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
856 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
857 # Old non-property versions of the generators, for backwards |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
858 # compatibility with BS3. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
859 def nextGenerator(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
860 return self.next_elements |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
861 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
862 def nextSiblingGenerator(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
863 return self.next_siblings |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
864 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
865 def previousGenerator(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
866 return self.previous_elements |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
867 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
868 def previousSiblingGenerator(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
869 return self.previous_siblings |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
870 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
871 def parentGenerator(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
872 return self.parents |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
873 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
874 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
875 class NavigableString(str, PageElement): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
876 """A Python Unicode string that is part of a parse tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
877 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
878 When Beautiful Soup parses the markup <b>penguin</b>, it will |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
879 create a NavigableString for the string "penguin". |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
880 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
881 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
882 PREFIX = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
883 SUFFIX = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
884 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
885 # We can't tell just by looking at a string whether it's contained |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
886 # in an XML document or an HTML document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
887 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
888 known_xml = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
889 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
890 def __new__(cls, value): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
891 """Create a new NavigableString. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
892 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
893 When unpickling a NavigableString, this method is called with |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
894 the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
895 passed in to the superclass's __new__ or the superclass won't know |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
896 how to handle non-ASCII characters. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
897 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
898 if isinstance(value, str): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
899 u = str.__new__(cls, value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
900 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
901 u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
902 u.setup() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
903 return u |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
904 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
905 def __copy__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
906 """A copy of a NavigableString has the same contents and class |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
907 as the original, but it is not connected to the parse tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
908 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
909 return type(self)(self) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
910 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
911 def __getnewargs__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
912 return (str(self),) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
913 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
914 def __getattr__(self, attr): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
915 """text.string gives you text. This is for backwards |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
916 compatibility for Navigable*String, but for CData* it lets you |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
917 get the string without the CData wrapper.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
918 if attr == 'string': |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
919 return self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
920 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
921 raise AttributeError( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
922 "'%s' object has no attribute '%s'" % ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
923 self.__class__.__name__, attr)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
924 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
925 def output_ready(self, formatter="minimal"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
926 """Run the string through the provided formatter. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
927 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
928 :param formatter: A Formatter object, or a string naming one of the standard formatters. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
929 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
930 output = self.format_string(self, formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
931 return self.PREFIX + output + self.SUFFIX |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
932 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
933 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
934 def name(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
935 """Since a NavigableString is not a Tag, it has no .name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
936 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
937 This property is implemented so that code like this doesn't crash |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
938 when run on a mixture of Tag and NavigableString objects: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
939 [x.name for x in tag.children] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
940 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
941 return None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
942 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
943 @name.setter |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
944 def name(self, name): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
945 """Prevent NavigableString.name from ever being set.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
946 raise AttributeError("A NavigableString cannot be given a name.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
947 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
948 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
949 class PreformattedString(NavigableString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
950 """A NavigableString not subject to the normal formatting rules. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
951 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
952 This is an abstract class used for special kinds of strings such |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
953 as comments (the Comment class) and CDATA blocks (the CData |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
954 class). |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
955 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
956 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
957 PREFIX = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
958 SUFFIX = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
959 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
960 def output_ready(self, formatter=None): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
961 """Make this string ready for output by adding any subclass-specific |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
962 prefix or suffix. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
963 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
964 :param formatter: A Formatter object, or a string naming one |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
965 of the standard formatters. The string will be passed into the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
966 Formatter, but only to trigger any side effects: the return |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
967 value is ignored. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
968 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
969 :return: The string, with any subclass-specific prefix and |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
970 suffix added on. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
971 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
972 if formatter is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
973 ignore = self.format_string(self, formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
974 return self.PREFIX + self + self.SUFFIX |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
975 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
976 class CData(PreformattedString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
977 """A CDATA block.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
978 PREFIX = '<![CDATA[' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
979 SUFFIX = ']]>' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
980 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
981 class ProcessingInstruction(PreformattedString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
982 """A SGML processing instruction.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
983 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
984 PREFIX = '<?' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
985 SUFFIX = '>' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
986 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
987 class XMLProcessingInstruction(ProcessingInstruction): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
988 """An XML processing instruction.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
989 PREFIX = '<?' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
990 SUFFIX = '?>' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
991 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
992 class Comment(PreformattedString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
993 """An HTML or XML comment.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
994 PREFIX = '<!--' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
995 SUFFIX = '-->' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
996 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
997 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
998 class Declaration(PreformattedString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
999 """An XML declaration.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1000 PREFIX = '<?' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1001 SUFFIX = '?>' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1002 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1003 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1004 class Doctype(PreformattedString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1005 """A document type declaration.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1006 @classmethod |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1007 def for_name_and_ids(cls, name, pub_id, system_id): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1008 """Generate an appropriate document type declaration for a given |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1009 public ID and system ID. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1010 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1011 :param name: The name of the document's root element, e.g. 'html'. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1012 :param pub_id: The Formal Public Identifier for this document type, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1013 e.g. '-//W3C//DTD XHTML 1.1//EN' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1014 :param system_id: The system identifier for this document type, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1015 e.g. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1016 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1017 :return: A Doctype. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1018 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1019 value = name or '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1020 if pub_id is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1021 value += ' PUBLIC "%s"' % pub_id |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1022 if system_id is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1023 value += ' "%s"' % system_id |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1024 elif system_id is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1025 value += ' SYSTEM "%s"' % system_id |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1026 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1027 return Doctype(value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1028 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1029 PREFIX = '<!DOCTYPE ' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1030 SUFFIX = '>\n' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1031 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1032 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1033 class Stylesheet(NavigableString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1034 """A NavigableString representing an stylesheet (probably |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1035 CSS). |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1036 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1037 Used to distinguish embedded stylesheets from textual content. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1038 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1039 pass |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1040 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1041 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1042 class Script(NavigableString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1043 """A NavigableString representing an executable script (probably |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1044 Javascript). |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1045 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1046 Used to distinguish executable code from textual content. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1047 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1048 pass |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1049 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1050 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1051 class TemplateString(NavigableString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1052 """A NavigableString representing a string found inside an HTML |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1053 template embedded in a larger document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1054 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1055 Used to distinguish such strings from the main body of the document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1056 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1057 pass |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1058 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1059 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1060 class Tag(PageElement): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1061 """Represents an HTML or XML tag that is part of a parse tree, along |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1062 with its attributes and contents. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1063 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1064 When Beautiful Soup parses the markup <b>penguin</b>, it will |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1065 create a Tag object representing the <b> tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1066 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1067 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1068 def __init__(self, parser=None, builder=None, name=None, namespace=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1069 prefix=None, attrs=None, parent=None, previous=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1070 is_xml=None, sourceline=None, sourcepos=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1071 can_be_empty_element=None, cdata_list_attributes=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1072 preserve_whitespace_tags=None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1073 ): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1074 """Basic constructor. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1075 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1076 :param parser: A BeautifulSoup object. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1077 :param builder: A TreeBuilder. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1078 :param name: The name of the tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1079 :param namespace: The URI of this Tag's XML namespace, if any. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1080 :param prefix: The prefix for this Tag's XML namespace, if any. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1081 :param attrs: A dictionary of this Tag's attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1082 :param parent: The PageElement to use as this Tag's parent. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1083 :param previous: The PageElement that was parsed immediately before |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1084 this tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1085 :param is_xml: If True, this is an XML tag. Otherwise, this is an |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1086 HTML tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1087 :param sourceline: The line number where this tag was found in its |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1088 source document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1089 :param sourcepos: The character position within `sourceline` where this |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1090 tag was found. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1091 :param can_be_empty_element: If True, this tag should be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1092 represented as <tag/>. If False, this tag should be represented |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1093 as <tag></tag>. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1094 :param cdata_list_attributes: A list of attributes whose values should |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1095 be treated as CDATA if they ever show up on this tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1096 :param preserve_whitespace_tags: A list of tag names whose contents |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1097 should have their whitespace preserved. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1098 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1099 if parser is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1100 self.parser_class = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1101 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1102 # We don't actually store the parser object: that lets extracted |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1103 # chunks be garbage-collected. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1104 self.parser_class = parser.__class__ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1105 if name is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1106 raise ValueError("No value provided for new tag's name.") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1107 self.name = name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1108 self.namespace = namespace |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1109 self.prefix = prefix |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1110 if ((not builder or builder.store_line_numbers) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1111 and (sourceline is not None or sourcepos is not None)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1112 self.sourceline = sourceline |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1113 self.sourcepos = sourcepos |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1114 if attrs is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1115 attrs = {} |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1116 elif attrs: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1117 if builder is not None and builder.cdata_list_attributes: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1118 attrs = builder._replace_cdata_list_attribute_values( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1119 self.name, attrs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1120 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1121 attrs = dict(attrs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1122 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1123 attrs = dict(attrs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1124 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1125 # If possible, determine ahead of time whether this tag is an |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1126 # XML tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1127 if builder: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1128 self.known_xml = builder.is_xml |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1129 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1130 self.known_xml = is_xml |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1131 self.attrs = attrs |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1132 self.contents = [] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1133 self.setup(parent, previous) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1134 self.hidden = False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1135 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1136 if builder is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1137 # In the absence of a TreeBuilder, use whatever values were |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1138 # passed in here. They're probably None, unless this is a copy of some |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1139 # other tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1140 self.can_be_empty_element = can_be_empty_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1141 self.cdata_list_attributes = cdata_list_attributes |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1142 self.preserve_whitespace_tags = preserve_whitespace_tags |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1143 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1144 # Set up any substitutions for this tag, such as the charset in a META tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1145 builder.set_up_substitutions(self) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1146 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1147 # Ask the TreeBuilder whether this tag might be an empty-element tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1148 self.can_be_empty_element = builder.can_be_empty_element(name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1149 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1150 # Keep track of the list of attributes of this tag that |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1151 # might need to be treated as a list. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1152 # |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1153 # For performance reasons, we store the whole data structure |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1154 # rather than asking the question of every tag. Asking would |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1155 # require building a new data structure every time, and |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1156 # (unlike can_be_empty_element), we almost never need |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1157 # to check this. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1158 self.cdata_list_attributes = builder.cdata_list_attributes |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1159 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1160 # Keep track of the names that might cause this tag to be treated as a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1161 # whitespace-preserved tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1162 self.preserve_whitespace_tags = builder.preserve_whitespace_tags |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1163 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1164 parserClass = _alias("parser_class") # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1165 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1166 def __copy__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1167 """A copy of a Tag is a new Tag, unconnected to the parse tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1168 Its contents are a copy of the old Tag's contents. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1169 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1170 clone = type(self)( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1171 None, self.builder, self.name, self.namespace, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1172 self.prefix, self.attrs, is_xml=self._is_xml, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1173 sourceline=self.sourceline, sourcepos=self.sourcepos, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1174 can_be_empty_element=self.can_be_empty_element, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1175 cdata_list_attributes=self.cdata_list_attributes, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1176 preserve_whitespace_tags=self.preserve_whitespace_tags |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1177 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1178 for attr in ('can_be_empty_element', 'hidden'): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1179 setattr(clone, attr, getattr(self, attr)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1180 for child in self.contents: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1181 clone.append(child.__copy__()) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1182 return clone |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1183 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1184 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1185 def is_empty_element(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1186 """Is this tag an empty-element tag? (aka a self-closing tag) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1187 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1188 A tag that has contents is never an empty-element tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1189 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1190 A tag that has no contents may or may not be an empty-element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1191 tag. It depends on the builder used to create the tag. If the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1192 builder has a designated list of empty-element tags, then only |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1193 a tag whose name shows up in that list is considered an |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1194 empty-element tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1195 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1196 If the builder has no designated list of empty-element tags, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1197 then any tag with no contents is an empty-element tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1198 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1199 return len(self.contents) == 0 and self.can_be_empty_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1200 isSelfClosing = is_empty_element # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1201 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1202 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1203 def string(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1204 """Convenience property to get the single string within this |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1205 PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1206 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1207 TODO It might make sense to have NavigableString.string return |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1208 itself. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1209 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1210 :return: If this element has a single string child, return |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1211 value is that string. If this element has one child tag, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1212 return value is the 'string' attribute of the child tag, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1213 recursively. If this element is itself a string, has no |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1214 children, or has more than one child, return value is None. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1215 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1216 if len(self.contents) != 1: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1217 return None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1218 child = self.contents[0] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1219 if isinstance(child, NavigableString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1220 return child |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1221 return child.string |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1222 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1223 @string.setter |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1224 def string(self, string): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1225 """Replace this PageElement's contents with `string`.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1226 self.clear() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1227 self.append(string.__class__(string)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1228 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1229 def _all_strings(self, strip=False, types=(NavigableString, CData)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1230 """Yield all strings of certain classes, possibly stripping them. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1231 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1232 :param strip: If True, all strings will be stripped before being |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1233 yielded. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1234 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1235 :types: A tuple of NavigableString subclasses. Any strings of |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1236 a subclass not found in this list will be ignored. By |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1237 default, this means only NavigableString and CData objects |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1238 will be considered. So no comments, processing instructions, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1239 etc. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1240 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1241 :yield: A sequence of strings. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1242 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1243 for descendant in self.descendants: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1244 if ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1245 (types is None and not isinstance(descendant, NavigableString)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1246 or |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1247 (types is not None and type(descendant) not in types)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1248 continue |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1249 if strip: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1250 descendant = descendant.strip() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1251 if len(descendant) == 0: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1252 continue |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1253 yield descendant |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1254 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1255 strings = property(_all_strings) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1256 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1257 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1258 def stripped_strings(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1259 """Yield all strings in the document, stripping them first. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1260 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1261 :yield: A sequence of stripped strings. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1262 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1263 for string in self._all_strings(True): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1264 yield string |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1265 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1266 def get_text(self, separator="", strip=False, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1267 types=(NavigableString, CData)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1268 """Get all child strings, concatenated using the given separator. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1269 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1270 :param separator: Strings will be concatenated using this separator. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1271 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1272 :param strip: If True, strings will be stripped before being |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1273 concatenated. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1274 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1275 :types: A tuple of NavigableString subclasses. Any strings of |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1276 a subclass not found in this list will be ignored. By |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1277 default, this means only NavigableString and CData objects |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1278 will be considered. So no comments, processing instructions, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1279 stylesheets, etc. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1280 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1281 :return: A string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1282 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1283 return separator.join([s for s in self._all_strings( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1284 strip, types=types)]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1285 getText = get_text |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1286 text = property(get_text) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1287 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1288 def decompose(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1289 """Recursively destroys this PageElement and its children. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1290 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1291 This element will be removed from the tree and wiped out; so |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1292 will everything beneath it. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1293 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1294 The behavior of a decomposed PageElement is undefined and you |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1295 should never use one for anything, but if you need to _check_ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1296 whether an element has been decomposed, you can use the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1297 `decomposed` property. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1298 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1299 self.extract() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1300 i = self |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1301 while i is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1302 n = i.next_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1303 i.__dict__.clear() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1304 i.contents = [] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1305 i._decomposed = True |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1306 i = n |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1307 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1308 def clear(self, decompose=False): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1309 """Wipe out all children of this PageElement by calling extract() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1310 on them. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1311 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1312 :param decompose: If this is True, decompose() (a more |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1313 destructive method) will be called instead of extract(). |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1314 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1315 if decompose: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1316 for element in self.contents[:]: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1317 if isinstance(element, Tag): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1318 element.decompose() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1319 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1320 element.extract() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1321 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1322 for element in self.contents[:]: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1323 element.extract() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1324 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1325 def smooth(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1326 """Smooth out this element's children by consolidating consecutive |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1327 strings. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1328 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1329 This makes pretty-printed output look more natural following a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1330 lot of operations that modified the tree. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1331 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1332 # Mark the first position of every pair of children that need |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1333 # to be consolidated. Do this rather than making a copy of |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1334 # self.contents, since in most cases very few strings will be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1335 # affected. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1336 marked = [] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1337 for i, a in enumerate(self.contents): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1338 if isinstance(a, Tag): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1339 # Recursively smooth children. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1340 a.smooth() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1341 if i == len(self.contents)-1: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1342 # This is the last item in .contents, and it's not a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1343 # tag. There's no chance it needs any work. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1344 continue |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1345 b = self.contents[i+1] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1346 if (isinstance(a, NavigableString) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1347 and isinstance(b, NavigableString) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1348 and not isinstance(a, PreformattedString) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1349 and not isinstance(b, PreformattedString) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1350 ): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1351 marked.append(i) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1352 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1353 # Go over the marked positions in reverse order, so that |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1354 # removing items from .contents won't affect the remaining |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1355 # positions. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1356 for i in reversed(marked): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1357 a = self.contents[i] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1358 b = self.contents[i+1] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1359 b.extract() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1360 n = NavigableString(a+b) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1361 a.replace_with(n) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1362 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1363 def index(self, element): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1364 """Find the index of a child by identity, not value. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1365 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1366 Avoids issues with tag.contents.index(element) getting the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1367 index of equal elements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1368 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1369 :param element: Look for this PageElement in `self.contents`. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1370 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1371 for i, child in enumerate(self.contents): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1372 if child is element: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1373 return i |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1374 raise ValueError("Tag.index: element not in tag") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1375 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1376 def get(self, key, default=None): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1377 """Returns the value of the 'key' attribute for the tag, or |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1378 the value given for 'default' if it doesn't have that |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1379 attribute.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1380 return self.attrs.get(key, default) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1381 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1382 def get_attribute_list(self, key, default=None): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1383 """The same as get(), but always returns a list. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1384 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1385 :param key: The attribute to look for. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1386 :param default: Use this value if the attribute is not present |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1387 on this PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1388 :return: A list of values, probably containing only a single |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1389 value. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1390 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1391 value = self.get(key, default) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1392 if not isinstance(value, list): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1393 value = [value] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1394 return value |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1395 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1396 def has_attr(self, key): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1397 """Does this PageElement have an attribute with the given name?""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1398 return key in self.attrs |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1399 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1400 def __hash__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1401 return str(self).__hash__() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1402 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1403 def __getitem__(self, key): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1404 """tag[key] returns the value of the 'key' attribute for the Tag, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1405 and throws an exception if it's not there.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1406 return self.attrs[key] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1407 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1408 def __iter__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1409 "Iterating over a Tag iterates over its contents." |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1410 return iter(self.contents) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1411 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1412 def __len__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1413 "The length of a Tag is the length of its list of contents." |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1414 return len(self.contents) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1415 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1416 def __contains__(self, x): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1417 return x in self.contents |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1418 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1419 def __bool__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1420 "A tag is non-None even if it has no contents." |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1421 return True |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1422 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1423 def __setitem__(self, key, value): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1424 """Setting tag[key] sets the value of the 'key' attribute for the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1425 tag.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1426 self.attrs[key] = value |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1427 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1428 def __delitem__(self, key): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1429 "Deleting tag[key] deletes all 'key' attributes for the tag." |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1430 self.attrs.pop(key, None) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1431 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1432 def __call__(self, *args, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1433 """Calling a Tag like a function is the same as calling its |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1434 find_all() method. Eg. tag('a') returns a list of all the A tags |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1435 found within this tag.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1436 return self.find_all(*args, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1437 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1438 def __getattr__(self, tag): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1439 """Calling tag.subtag is the same as calling tag.find(name="subtag")""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1440 #print("Getattr %s.%s" % (self.__class__, tag)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1441 if len(tag) > 3 and tag.endswith('Tag'): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1442 # BS3: soup.aTag -> "soup.find("a") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1443 tag_name = tag[:-3] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1444 warnings.warn( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1445 '.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1446 name=tag_name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1447 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1448 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1449 return self.find(tag_name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1450 # We special case contents to avoid recursion. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1451 elif not tag.startswith("__") and not tag == "contents": |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1452 return self.find(tag) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1453 raise AttributeError( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1454 "'%s' object has no attribute '%s'" % (self.__class__, tag)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1455 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1456 def __eq__(self, other): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1457 """Returns true iff this Tag has the same name, the same attributes, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1458 and the same contents (recursively) as `other`.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1459 if self is other: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1460 return True |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1461 if (not hasattr(other, 'name') or |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1462 not hasattr(other, 'attrs') or |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1463 not hasattr(other, 'contents') or |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1464 self.name != other.name or |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1465 self.attrs != other.attrs or |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1466 len(self) != len(other)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1467 return False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1468 for i, my_child in enumerate(self.contents): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1469 if my_child != other.contents[i]: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1470 return False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1471 return True |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1472 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1473 def __ne__(self, other): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1474 """Returns true iff this Tag is not identical to `other`, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1475 as defined in __eq__.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1476 return not self == other |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1477 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1478 def __repr__(self, encoding="unicode-escape"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1479 """Renders this PageElement as a string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1480 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1481 :param encoding: The encoding to use (Python 2 only). |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1482 :return: Under Python 2, a bytestring; under Python 3, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1483 a Unicode string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1484 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1485 if PY3K: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1486 # "The return value must be a string object", i.e. Unicode |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1487 return self.decode() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1488 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1489 # "The return value must be a string object", i.e. a bytestring. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1490 # By convention, the return value of __repr__ should also be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1491 # an ASCII string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1492 return self.encode(encoding) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1493 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1494 def __unicode__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1495 """Renders this PageElement as a Unicode string.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1496 return self.decode() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1497 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1498 def __str__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1499 """Renders this PageElement as a generic string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1500 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1501 :return: Under Python 2, a UTF-8 bytestring; under Python 3, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1502 a Unicode string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1503 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1504 if PY3K: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1505 return self.decode() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1506 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1507 return self.encode() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1508 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1509 if PY3K: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1510 __str__ = __repr__ = __unicode__ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1511 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1512 def encode(self, encoding=DEFAULT_OUTPUT_ENCODING, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1513 indent_level=None, formatter="minimal", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1514 errors="xmlcharrefreplace"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1515 """Render a bytestring representation of this PageElement and its |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1516 contents. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1517 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1518 :param encoding: The destination encoding. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1519 :param indent_level: Each line of the rendering will be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1520 indented this many spaces. Used internally in |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1521 recursive calls while pretty-printing. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1522 :param formatter: A Formatter object, or a string naming one of |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1523 the standard formatters. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1524 :param errors: An error handling strategy such as |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1525 'xmlcharrefreplace'. This value is passed along into |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1526 encode() and its value should be one of the constants |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1527 defined by Python. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1528 :return: A bytestring. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1529 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1530 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1531 # Turn the data structure into Unicode, then encode the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1532 # Unicode. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1533 u = self.decode(indent_level, encoding, formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1534 return u.encode(encoding, errors) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1535 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1536 def decode(self, indent_level=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1537 eventual_encoding=DEFAULT_OUTPUT_ENCODING, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1538 formatter="minimal"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1539 """Render a Unicode representation of this PageElement and its |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1540 contents. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1541 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1542 :param indent_level: Each line of the rendering will be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1543 indented this many spaces. Used internally in |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1544 recursive calls while pretty-printing. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1545 :param eventual_encoding: The tag is destined to be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1546 encoded into this encoding. This method is _not_ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1547 responsible for performing that encoding. This information |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1548 is passed in so that it can be substituted in if the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1549 document contains a <META> tag that mentions the document's |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1550 encoding. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1551 :param formatter: A Formatter object, or a string naming one of |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1552 the standard formatters. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1553 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1554 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1555 # First off, turn a non-Formatter `formatter` into a Formatter |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1556 # object. This will stop the lookup from happening over and |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1557 # over again. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1558 if not isinstance(formatter, Formatter): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1559 formatter = self.formatter_for_name(formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1560 attributes = formatter.attributes(self) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1561 attrs = [] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1562 for key, val in attributes: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1563 if val is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1564 decoded = key |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1565 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1566 if isinstance(val, list) or isinstance(val, tuple): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1567 val = ' '.join(val) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1568 elif not isinstance(val, str): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1569 val = str(val) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1570 elif ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1571 isinstance(val, AttributeValueWithCharsetSubstitution) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1572 and eventual_encoding is not None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1573 ): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1574 val = val.encode(eventual_encoding) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1575 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1576 text = formatter.attribute_value(val) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1577 decoded = ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1578 str(key) + '=' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1579 + formatter.quoted_attribute_value(text)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1580 attrs.append(decoded) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1581 close = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1582 closeTag = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1583 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1584 prefix = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1585 if self.prefix: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1586 prefix = self.prefix + ":" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1587 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1588 if self.is_empty_element: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1589 close = formatter.void_element_close_prefix or '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1590 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1591 closeTag = '</%s%s>' % (prefix, self.name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1592 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1593 pretty_print = self._should_pretty_print(indent_level) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1594 space = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1595 indent_space = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1596 if indent_level is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1597 indent_space = (' ' * (indent_level - 1)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1598 if pretty_print: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1599 space = indent_space |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1600 indent_contents = indent_level + 1 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1601 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1602 indent_contents = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1603 contents = self.decode_contents( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1604 indent_contents, eventual_encoding, formatter |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1605 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1606 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1607 if self.hidden: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1608 # This is the 'document root' object. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1609 s = contents |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1610 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1611 s = [] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1612 attribute_string = '' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1613 if attrs: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1614 attribute_string = ' ' + ' '.join(attrs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1615 if indent_level is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1616 # Even if this particular tag is not pretty-printed, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1617 # we should indent up to the start of the tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1618 s.append(indent_space) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1619 s.append('<%s%s%s%s>' % ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1620 prefix, self.name, attribute_string, close)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1621 if pretty_print: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1622 s.append("\n") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1623 s.append(contents) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1624 if pretty_print and contents and contents[-1] != "\n": |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1625 s.append("\n") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1626 if pretty_print and closeTag: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1627 s.append(space) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1628 s.append(closeTag) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1629 if indent_level is not None and closeTag and self.next_sibling: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1630 # Even if this particular tag is not pretty-printed, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1631 # we're now done with the tag, and we should add a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1632 # newline if appropriate. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1633 s.append("\n") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1634 s = ''.join(s) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1635 return s |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1636 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1637 def _should_pretty_print(self, indent_level): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1638 """Should this tag be pretty-printed? |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1639 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1640 Most of them should, but some (such as <pre> in HTML |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1641 documents) should not. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1642 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1643 return ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1644 indent_level is not None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1645 and ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1646 not self.preserve_whitespace_tags |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1647 or self.name not in self.preserve_whitespace_tags |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1648 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1649 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1650 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1651 def prettify(self, encoding=None, formatter="minimal"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1652 """Pretty-print this PageElement as a string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1653 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1654 :param encoding: The eventual encoding of the string. If this is None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1655 a Unicode string will be returned. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1656 :param formatter: A Formatter object, or a string naming one of |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1657 the standard formatters. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1658 :return: A Unicode string (if encoding==None) or a bytestring |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1659 (otherwise). |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1660 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1661 if encoding is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1662 return self.decode(True, formatter=formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1663 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1664 return self.encode(encoding, True, formatter=formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1665 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1666 def decode_contents(self, indent_level=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1667 eventual_encoding=DEFAULT_OUTPUT_ENCODING, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1668 formatter="minimal"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1669 """Renders the contents of this tag as a Unicode string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1670 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1671 :param indent_level: Each line of the rendering will be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1672 indented this many spaces. Used internally in |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1673 recursive calls while pretty-printing. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1674 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1675 :param eventual_encoding: The tag is destined to be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1676 encoded into this encoding. decode_contents() is _not_ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1677 responsible for performing that encoding. This information |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1678 is passed in so that it can be substituted in if the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1679 document contains a <META> tag that mentions the document's |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1680 encoding. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1681 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1682 :param formatter: A Formatter object, or a string naming one of |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1683 the standard Formatters. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1684 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1685 # First off, turn a string formatter into a Formatter object. This |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1686 # will stop the lookup from happening over and over again. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1687 if not isinstance(formatter, Formatter): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1688 formatter = self.formatter_for_name(formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1689 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1690 pretty_print = (indent_level is not None) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1691 s = [] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1692 for c in self: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1693 text = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1694 if isinstance(c, NavigableString): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1695 text = c.output_ready(formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1696 elif isinstance(c, Tag): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1697 s.append(c.decode(indent_level, eventual_encoding, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1698 formatter)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1699 preserve_whitespace = ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1700 self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1701 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1702 if text and indent_level and not preserve_whitespace: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1703 text = text.strip() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1704 if text: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1705 if pretty_print and not preserve_whitespace: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1706 s.append(" " * (indent_level - 1)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1707 s.append(text) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1708 if pretty_print and not preserve_whitespace: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1709 s.append("\n") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1710 return ''.join(s) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1711 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1712 def encode_contents( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1713 self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1714 formatter="minimal"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1715 """Renders the contents of this PageElement as a bytestring. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1716 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1717 :param indent_level: Each line of the rendering will be |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1718 indented this many spaces. Used internally in |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1719 recursive calls while pretty-printing. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1720 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1721 :param eventual_encoding: The bytestring will be in this encoding. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1722 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1723 :param formatter: A Formatter object, or a string naming one of |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1724 the standard Formatters. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1725 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1726 :return: A bytestring. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1727 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1728 contents = self.decode_contents(indent_level, encoding, formatter) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1729 return contents.encode(encoding) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1730 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1731 # Old method for BS3 compatibility |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1732 def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1733 prettyPrint=False, indentLevel=0): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1734 """Deprecated method for BS3 compatibility.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1735 if not prettyPrint: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1736 indentLevel = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1737 return self.encode_contents( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1738 indent_level=indentLevel, encoding=encoding) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1739 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1740 #Soup methods |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1741 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1742 def find(self, name=None, attrs={}, recursive=True, text=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1743 **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1744 """Look in the children of this PageElement and find the first |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1745 PageElement that matches the given criteria. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1746 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1747 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1748 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1749 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1750 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1751 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1752 :param recursive: If this is True, find() will perform a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1753 recursive search of this PageElement's children. Otherwise, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1754 only the direct children will be considered. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1755 :param limit: Stop looking after finding this many results. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1756 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1757 :return: A PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1758 :rtype: bs4.element.Tag | bs4.element.NavigableString |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1759 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1760 r = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1761 l = self.find_all(name, attrs, recursive, text, 1, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1762 if l: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1763 r = l[0] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1764 return r |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1765 findChild = find #BS2 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1766 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1767 def find_all(self, name=None, attrs={}, recursive=True, text=None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1768 limit=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1769 """Look in the children of this PageElement and find all |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1770 PageElements that match the given criteria. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1771 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1772 All find_* methods take a common set of arguments. See the online |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1773 documentation for detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1774 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1775 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1776 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1777 :param recursive: If this is True, find_all() will perform a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1778 recursive search of this PageElement's children. Otherwise, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1779 only the direct children will be considered. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1780 :param limit: Stop looking after finding this many results. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1781 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1782 :return: A ResultSet of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1783 :rtype: bs4.element.ResultSet |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1784 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1785 generator = self.descendants |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1786 if not recursive: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1787 generator = self.children |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1788 return self._find_all(name, attrs, text, limit, generator, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1789 findAll = find_all # BS3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1790 findChildren = find_all # BS2 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1791 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1792 #Generator methods |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1793 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1794 def children(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1795 """Iterate over all direct children of this PageElement. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1796 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1797 :yield: A sequence of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1798 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1799 # return iter() to make the purpose of the method clear |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1800 return iter(self.contents) # XXX This seems to be untested. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1801 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1802 @property |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1803 def descendants(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1804 """Iterate over all children of this PageElement in a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1805 breadth-first sequence. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1806 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1807 :yield: A sequence of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1808 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1809 if not len(self.contents): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1810 return |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1811 stopNode = self._last_descendant().next_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1812 current = self.contents[0] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1813 while current is not stopNode: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1814 yield current |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1815 current = current.next_element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1816 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1817 # CSS selector code |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1818 def select_one(self, selector, namespaces=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1819 """Perform a CSS selection operation on the current element. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1820 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1821 :param selector: A CSS selector. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1822 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1823 :param namespaces: A dictionary mapping namespace prefixes |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1824 used in the CSS selector to namespace URIs. By default, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1825 Beautiful Soup will use the prefixes it encountered while |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1826 parsing the document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1827 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1828 :param kwargs: Keyword arguments to be passed into SoupSieve's |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1829 soupsieve.select() method. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1830 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1831 :return: A Tag. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1832 :rtype: bs4.element.Tag |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1833 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1834 value = self.select(selector, namespaces, 1, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1835 if value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1836 return value[0] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1837 return None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1838 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1839 def select(self, selector, namespaces=None, limit=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1840 """Perform a CSS selection operation on the current element. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1841 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1842 This uses the SoupSieve library. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1843 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1844 :param selector: A string containing a CSS selector. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1845 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1846 :param namespaces: A dictionary mapping namespace prefixes |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1847 used in the CSS selector to namespace URIs. By default, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1848 Beautiful Soup will use the prefixes it encountered while |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1849 parsing the document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1850 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1851 :param limit: After finding this number of results, stop looking. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1852 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1853 :param kwargs: Keyword arguments to be passed into SoupSieve's |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1854 soupsieve.select() method. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1855 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1856 :return: A ResultSet of Tags. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1857 :rtype: bs4.element.ResultSet |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1858 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1859 if namespaces is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1860 namespaces = self._namespaces |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1861 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1862 if limit is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1863 limit = 0 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1864 if soupsieve is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1865 raise NotImplementedError( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1866 "Cannot execute CSS selectors because the soupsieve package is not installed." |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1867 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1868 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1869 results = soupsieve.select(selector, self, namespaces, limit, **kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1870 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1871 # We do this because it's more consistent and because |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1872 # ResultSet.__getattr__ has a helpful error message. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1873 return ResultSet(None, results) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1874 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1875 # Old names for backwards compatibility |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1876 def childGenerator(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1877 """Deprecated generator.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1878 return self.children |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1879 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1880 def recursiveChildGenerator(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1881 """Deprecated generator.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1882 return self.descendants |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1883 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1884 def has_key(self, key): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1885 """Deprecated method. This was kind of misleading because has_key() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1886 (attributes) was different from __in__ (contents). |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1887 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1888 has_key() is gone in Python 3, anyway. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1889 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1890 warnings.warn('has_key is deprecated. Use has_attr("%s") instead.' % ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1891 key)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1892 return self.has_attr(key) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1893 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1894 # Next, a couple classes to represent queries and their results. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1895 class SoupStrainer(object): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1896 """Encapsulates a number of ways of matching a markup element (tag or |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1897 string). |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1898 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1899 This is primarily used to underpin the find_* methods, but you can |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1900 create one yourself and pass it in as `parse_only` to the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1901 `BeautifulSoup` constructor, to parse a subset of a large |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1902 document. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1903 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1904 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1905 def __init__(self, name=None, attrs={}, text=None, **kwargs): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1906 """Constructor. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1907 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1908 The SoupStrainer constructor takes the same arguments passed |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1909 into the find_* methods. See the online documentation for |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1910 detailed explanations. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1911 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1912 :param name: A filter on tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1913 :param attrs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1914 :param text: A filter for a NavigableString with specific text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1915 :kwargs: A dictionary of filters on attribute values. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1916 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1917 self.name = self._normalize_search_value(name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1918 if not isinstance(attrs, dict): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1919 # Treat a non-dict value for attrs as a search for the 'class' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1920 # attribute. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1921 kwargs['class'] = attrs |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1922 attrs = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1923 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1924 if 'class_' in kwargs: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1925 # Treat class_="foo" as a search for the 'class' |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1926 # attribute, overriding any non-dict value for attrs. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1927 kwargs['class'] = kwargs['class_'] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1928 del kwargs['class_'] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1929 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1930 if kwargs: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1931 if attrs: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1932 attrs = attrs.copy() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1933 attrs.update(kwargs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1934 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1935 attrs = kwargs |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1936 normalized_attrs = {} |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1937 for key, value in list(attrs.items()): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1938 normalized_attrs[key] = self._normalize_search_value(value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1939 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1940 self.attrs = normalized_attrs |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1941 self.text = self._normalize_search_value(text) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1942 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1943 def _normalize_search_value(self, value): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1944 # Leave it alone if it's a Unicode string, a callable, a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1945 # regular expression, a boolean, or None. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1946 if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match') |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1947 or isinstance(value, bool) or value is None): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1948 return value |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1949 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1950 # If it's a bytestring, convert it to Unicode, treating it as UTF-8. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1951 if isinstance(value, bytes): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1952 return value.decode("utf8") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1953 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1954 # If it's listlike, convert it into a list of strings. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1955 if hasattr(value, '__iter__'): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1956 new_value = [] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1957 for v in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1958 if (hasattr(v, '__iter__') and not isinstance(v, bytes) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1959 and not isinstance(v, str)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1960 # This is almost certainly the user's mistake. In the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1961 # interests of avoiding infinite loops, we'll let |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1962 # it through as-is rather than doing a recursive call. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1963 new_value.append(v) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1964 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1965 new_value.append(self._normalize_search_value(v)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1966 return new_value |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1967 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1968 # Otherwise, convert it into a Unicode string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1969 # The unicode(str()) thing is so this will do the same thing on Python 2 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1970 # and Python 3. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1971 return str(str(value)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1972 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1973 def __str__(self): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1974 """A human-readable representation of this SoupStrainer.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1975 if self.text: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1976 return self.text |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1977 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1978 return "%s|%s" % (self.name, self.attrs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1979 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1980 def search_tag(self, markup_name=None, markup_attrs={}): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1981 """Check whether a Tag with the given name and attributes would |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1982 match this SoupStrainer. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1983 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1984 Used prospectively to decide whether to even bother creating a Tag |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1985 object. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1986 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1987 :param markup_name: A tag name as found in some markup. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1988 :param markup_attrs: A dictionary of attributes as found in some markup. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1989 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1990 :return: True if the prospective tag would match this SoupStrainer; |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1991 False otherwise. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1992 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1993 found = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1994 markup = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1995 if isinstance(markup_name, Tag): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1996 markup = markup_name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1997 markup_attrs = markup |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1998 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1999 if isinstance(self.name, str): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2000 # Optimization for a very common case where the user is |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2001 # searching for a tag with one specific name, and we're |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2002 # looking at a tag with a different name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2003 if markup and not markup.prefix and self.name != markup.name: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2004 return False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2005 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2006 call_function_with_tag_data = ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2007 isinstance(self.name, Callable) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2008 and not isinstance(markup_name, Tag)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2009 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2010 if ((not self.name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2011 or call_function_with_tag_data |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2012 or (markup and self._matches(markup, self.name)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2013 or (not markup and self._matches(markup_name, self.name))): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2014 if call_function_with_tag_data: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2015 match = self.name(markup_name, markup_attrs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2016 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2017 match = True |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2018 markup_attr_map = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2019 for attr, match_against in list(self.attrs.items()): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2020 if not markup_attr_map: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2021 if hasattr(markup_attrs, 'get'): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2022 markup_attr_map = markup_attrs |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2023 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2024 markup_attr_map = {} |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2025 for k, v in markup_attrs: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2026 markup_attr_map[k] = v |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2027 attr_value = markup_attr_map.get(attr) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2028 if not self._matches(attr_value, match_against): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2029 match = False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2030 break |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2031 if match: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2032 if markup: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2033 found = markup |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2034 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2035 found = markup_name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2036 if found and self.text and not self._matches(found.string, self.text): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2037 found = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2038 return found |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2039 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2040 # For BS3 compatibility. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2041 searchTag = search_tag |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2042 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2043 def search(self, markup): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2044 """Find all items in `markup` that match this SoupStrainer. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2045 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2046 Used by the core _find_all() method, which is ultimately |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2047 called by all find_* methods. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2048 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2049 :param markup: A PageElement or a list of them. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2050 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2051 # print('looking for %s in %s' % (self, markup)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2052 found = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2053 # If given a list of items, scan it for a text element that |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2054 # matches. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2055 if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2056 for element in markup: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2057 if isinstance(element, NavigableString) \ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2058 and self.search(element): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2059 found = element |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2060 break |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2061 # If it's a Tag, make sure its name or attributes match. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2062 # Don't bother with Tags if we're searching for text. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2063 elif isinstance(markup, Tag): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2064 if not self.text or self.name or self.attrs: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2065 found = self.search_tag(markup) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2066 # If it's text, make sure the text matches. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2067 elif isinstance(markup, NavigableString) or \ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2068 isinstance(markup, str): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2069 if not self.name and not self.attrs and self._matches(markup, self.text): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2070 found = markup |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2071 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2072 raise Exception( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2073 "I don't know how to match against a %s" % markup.__class__) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2074 return found |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2075 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2076 def _matches(self, markup, match_against, already_tried=None): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2077 # print(u"Matching %s against %s" % (markup, match_against)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2078 result = False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2079 if isinstance(markup, list) or isinstance(markup, tuple): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2080 # This should only happen when searching a multi-valued attribute |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2081 # like 'class'. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2082 for item in markup: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2083 if self._matches(item, match_against): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2084 return True |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2085 # We didn't match any particular value of the multivalue |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2086 # attribute, but maybe we match the attribute value when |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2087 # considered as a string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2088 if self._matches(' '.join(markup), match_against): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2089 return True |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2090 return False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2091 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2092 if match_against is True: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2093 # True matches any non-None value. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2094 return markup is not None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2095 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2096 if isinstance(match_against, Callable): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2097 return match_against(markup) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2098 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2099 # Custom callables take the tag as an argument, but all |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2100 # other ways of matching match the tag name as a string. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2101 original_markup = markup |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2102 if isinstance(markup, Tag): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2103 markup = markup.name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2104 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2105 # Ensure that `markup` is either a Unicode string, or None. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2106 markup = self._normalize_search_value(markup) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2107 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2108 if markup is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2109 # None matches None, False, an empty string, an empty list, and so on. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2110 return not match_against |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2111 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2112 if (hasattr(match_against, '__iter__') |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2113 and not isinstance(match_against, str)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2114 # We're asked to match against an iterable of items. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2115 # The markup must be match at least one item in the |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2116 # iterable. We'll try each one in turn. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2117 # |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2118 # To avoid infinite recursion we need to keep track of |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2119 # items we've already seen. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2120 if not already_tried: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2121 already_tried = set() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2122 for item in match_against: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2123 if item.__hash__: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2124 key = item |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2125 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2126 key = id(item) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2127 if key in already_tried: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2128 continue |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2129 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2130 already_tried.add(key) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2131 if self._matches(original_markup, item, already_tried): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2132 return True |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2133 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2134 return False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2135 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2136 # Beyond this point we might need to run the test twice: once against |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2137 # the tag's name and once against its prefixed name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2138 match = False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2139 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2140 if not match and isinstance(match_against, str): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2141 # Exact string match |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2142 match = markup == match_against |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2143 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2144 if not match and hasattr(match_against, 'search'): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2145 # Regexp match |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2146 return match_against.search(markup) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2147 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2148 if (not match |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2149 and isinstance(original_markup, Tag) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2150 and original_markup.prefix): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2151 # Try the whole thing again with the prefixed tag name. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2152 return self._matches( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2153 original_markup.prefix + ':' + original_markup.name, match_against |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2154 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2155 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2156 return match |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2157 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2158 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2159 class ResultSet(list): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2160 """A ResultSet is just a list that keeps track of the SoupStrainer |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2161 that created it.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2162 def __init__(self, source, result=()): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2163 """Constructor. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2164 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2165 :param source: A SoupStrainer. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2166 :param result: A list of PageElements. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2167 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2168 super(ResultSet, self).__init__(result) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2169 self.source = source |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2170 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2171 def __getattr__(self, key): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2172 """Raise a helpful exception to explain a common code fix.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2173 raise AttributeError( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2174 "ResultSet object has no attribute '%s'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?" % key |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2175 ) |