comparison env/lib/python3.7/site-packages/bs4/__init__.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 """Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend".
2
3 http://www.crummy.com/software/BeautifulSoup/
4
5 Beautiful Soup uses a pluggable XML or HTML parser to parse a
6 (possibly invalid) document into a tree representation. Beautiful Soup
7 provides methods and Pythonic idioms that make it easy to navigate,
8 search, and modify the parse tree.
9
10 Beautiful Soup works with Python 2.7 and up. It works better if lxml
11 and/or html5lib is installed.
12
13 For more than you ever wanted to know about Beautiful Soup, see the
14 documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
15 """
16
17 __author__ = "Leonard Richardson (leonardr@segfault.org)"
18 __version__ = "4.9.0"
19 __copyright__ = "Copyright (c) 2004-2020 Leonard Richardson"
20 # Use of this source code is governed by the MIT license.
21 __license__ = "MIT"
22
23 __all__ = ['BeautifulSoup']
24
25 import os
26 import re
27 import sys
28 import traceback
29 import warnings
30
31 from .builder import builder_registry, ParserRejectedMarkup
32 from .dammit import UnicodeDammit
33 from .element import (
34 CData,
35 Comment,
36 DEFAULT_OUTPUT_ENCODING,
37 Declaration,
38 Doctype,
39 NavigableString,
40 PageElement,
41 ProcessingInstruction,
42 ResultSet,
43 SoupStrainer,
44 Tag,
45 )
46
47 # The very first thing we do is give a useful error if someone is
48 # running this code under Python 3 without converting it.
49 'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
50
51 class BeautifulSoup(Tag):
52 """A data structure representing a parsed HTML or XML document.
53
54 Most of the methods you'll call on a BeautifulSoup object are inherited from
55 PageElement or Tag.
56
57 Internally, this class defines the basic interface called by the
58 tree builders when converting an HTML/XML document into a data
59 structure. The interface abstracts away the differences between
60 parsers. To write a new tree builder, you'll need to understand
61 these methods as a whole.
62
63 These methods will be called by the BeautifulSoup constructor:
64 * reset()
65 * feed(markup)
66
67 The tree builder may call these methods from its feed() implementation:
68 * handle_starttag(name, attrs) # See note about return value
69 * handle_endtag(name)
70 * handle_data(data) # Appends to the current data node
71 * endData(containerClass) # Ends the current data node
72
73 No matter how complicated the underlying parser is, you should be
74 able to build a tree using 'start tag' events, 'end tag' events,
75 'data' events, and "done with data" events.
76
77 If you encounter an empty-element tag (aka a self-closing tag,
78 like HTML's <br> tag), call handle_starttag and then
79 handle_endtag.
80 """
81
82 # Since BeautifulSoup subclasses Tag, it's possible to treat it as
83 # a Tag with a .name. This name makes it clear the BeautifulSoup
84 # object isn't a real markup tag.
85 ROOT_TAG_NAME = '[document]'
86
87 # If the end-user gives no indication which tree builder they
88 # want, look for one with these features.
89 DEFAULT_BUILDER_FEATURES = ['html', 'fast']
90
91 # A string containing all ASCII whitespace characters, used in
92 # endData() to detect data chunks that seem 'empty'.
93 ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
94
95 NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
96
97 def __init__(self, markup="", features=None, builder=None,
98 parse_only=None, from_encoding=None, exclude_encodings=None,
99 element_classes=None, **kwargs):
100 """Constructor.
101
102 :param markup: A string or a file-like object representing
103 markup to be parsed.
104
105 :param features: Desirable features of the parser to be
106 used. This may be the name of a specific parser ("lxml",
107 "lxml-xml", "html.parser", or "html5lib") or it may be the
108 type of markup to be used ("html", "html5", "xml"). It's
109 recommended that you name a specific parser, so that
110 Beautiful Soup gives you the same results across platforms
111 and virtual environments.
112
113 :param builder: A TreeBuilder subclass to instantiate (or
114 instance to use) instead of looking one up based on
115 `features`. You only need to use this if you've implemented a
116 custom TreeBuilder.
117
118 :param parse_only: A SoupStrainer. Only parts of the document
119 matching the SoupStrainer will be considered. This is useful
120 when parsing part of a document that would otherwise be too
121 large to fit into memory.
122
123 :param from_encoding: A string indicating the encoding of the
124 document to be parsed. Pass this in if Beautiful Soup is
125 guessing wrongly about the document's encoding.
126
127 :param exclude_encodings: A list of strings indicating
128 encodings known to be wrong. Pass this in if you don't know
129 the document's encoding but you know Beautiful Soup's guess is
130 wrong.
131
132 :param element_classes: A dictionary mapping BeautifulSoup
133 classes like Tag and NavigableString, to other classes you'd
134 like to be instantiated instead as the parse tree is
135 built. This is useful for subclassing Tag or NavigableString
136 to modify default behavior.
137
138 :param kwargs: For backwards compatibility purposes, the
139 constructor accepts certain keyword arguments used in
140 Beautiful Soup 3. None of these arguments do anything in
141 Beautiful Soup 4; they will result in a warning and then be
142 ignored.
143
144 Apart from this, any keyword arguments passed into the
145 BeautifulSoup constructor are propagated to the TreeBuilder
146 constructor. This makes it possible to configure a
147 TreeBuilder by passing in arguments, not just by saying which
148 one to use.
149 """
150 if 'convertEntities' in kwargs:
151 del kwargs['convertEntities']
152 warnings.warn(
153 "BS4 does not respect the convertEntities argument to the "
154 "BeautifulSoup constructor. Entities are always converted "
155 "to Unicode characters.")
156
157 if 'markupMassage' in kwargs:
158 del kwargs['markupMassage']
159 warnings.warn(
160 "BS4 does not respect the markupMassage argument to the "
161 "BeautifulSoup constructor. The tree builder is responsible "
162 "for any necessary markup massage.")
163
164 if 'smartQuotesTo' in kwargs:
165 del kwargs['smartQuotesTo']
166 warnings.warn(
167 "BS4 does not respect the smartQuotesTo argument to the "
168 "BeautifulSoup constructor. Smart quotes are always converted "
169 "to Unicode characters.")
170
171 if 'selfClosingTags' in kwargs:
172 del kwargs['selfClosingTags']
173 warnings.warn(
174 "BS4 does not respect the selfClosingTags argument to the "
175 "BeautifulSoup constructor. The tree builder is responsible "
176 "for understanding self-closing tags.")
177
178 if 'isHTML' in kwargs:
179 del kwargs['isHTML']
180 warnings.warn(
181 "BS4 does not respect the isHTML argument to the "
182 "BeautifulSoup constructor. Suggest you use "
183 "features='lxml' for HTML and features='lxml-xml' for "
184 "XML.")
185
186 def deprecated_argument(old_name, new_name):
187 if old_name in kwargs:
188 warnings.warn(
189 'The "%s" argument to the BeautifulSoup constructor '
190 'has been renamed to "%s."' % (old_name, new_name))
191 value = kwargs[old_name]
192 del kwargs[old_name]
193 return value
194 return None
195
196 parse_only = parse_only or deprecated_argument(
197 "parseOnlyThese", "parse_only")
198
199 from_encoding = from_encoding or deprecated_argument(
200 "fromEncoding", "from_encoding")
201
202 if from_encoding and isinstance(markup, str):
203 warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
204 from_encoding = None
205
206 self.element_classes = element_classes or dict()
207
208 # We need this information to track whether or not the builder
209 # was specified well enough that we can omit the 'you need to
210 # specify a parser' warning.
211 original_builder = builder
212 original_features = features
213
214 if isinstance(builder, type):
215 # A builder class was passed in; it needs to be instantiated.
216 builder_class = builder
217 builder = None
218 elif builder is None:
219 if isinstance(features, str):
220 features = [features]
221 if features is None or len(features) == 0:
222 features = self.DEFAULT_BUILDER_FEATURES
223 builder_class = builder_registry.lookup(*features)
224 if builder_class is None:
225 raise FeatureNotFound(
226 "Couldn't find a tree builder with the features you "
227 "requested: %s. Do you need to install a parser library?"
228 % ",".join(features))
229
230 # At this point either we have a TreeBuilder instance in
231 # builder, or we have a builder_class that we can instantiate
232 # with the remaining **kwargs.
233 if builder is None:
234 builder = builder_class(**kwargs)
235 if not original_builder and not (
236 original_features == builder.NAME or
237 original_features in builder.ALTERNATE_NAMES
238 ):
239 if builder.is_xml:
240 markup_type = "XML"
241 else:
242 markup_type = "HTML"
243
244 # This code adapted from warnings.py so that we get the same line
245 # of code as our warnings.warn() call gets, even if the answer is wrong
246 # (as it may be in a multithreading situation).
247 caller = None
248 try:
249 caller = sys._getframe(1)
250 except ValueError:
251 pass
252 if caller:
253 globals = caller.f_globals
254 line_number = caller.f_lineno
255 else:
256 globals = sys.__dict__
257 line_number= 1
258 filename = globals.get('__file__')
259 if filename:
260 fnl = filename.lower()
261 if fnl.endswith((".pyc", ".pyo")):
262 filename = filename[:-1]
263 if filename:
264 # If there is no filename at all, the user is most likely in a REPL,
265 # and the warning is not necessary.
266 values = dict(
267 filename=filename,
268 line_number=line_number,
269 parser=builder.NAME,
270 markup_type=markup_type
271 )
272 warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
273 else:
274 if kwargs:
275 warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.")
276
277 self.builder = builder
278 self.is_xml = builder.is_xml
279 self.known_xml = self.is_xml
280 self._namespaces = dict()
281 self.parse_only = parse_only
282
283 self.builder.initialize_soup(self)
284
285 if hasattr(markup, 'read'): # It's a file-type object.
286 markup = markup.read()
287 elif len(markup) <= 256 and (
288 (isinstance(markup, bytes) and not b'<' in markup)
289 or (isinstance(markup, str) and not '<' in markup)
290 ):
291 # Print out warnings for a couple beginner problems
292 # involving passing non-markup to Beautiful Soup.
293 # Beautiful Soup will still parse the input as markup,
294 # just in case that's what the user really wants.
295 if (isinstance(markup, str)
296 and not os.path.supports_unicode_filenames):
297 possible_filename = markup.encode("utf8")
298 else:
299 possible_filename = markup
300 is_file = False
301 try:
302 is_file = os.path.exists(possible_filename)
303 except Exception as e:
304 # This is almost certainly a problem involving
305 # characters not valid in filenames on this
306 # system. Just let it go.
307 pass
308 if is_file:
309 warnings.warn(
310 '"%s" looks like a filename, not markup. You should'
311 ' probably open this file and pass the filehandle into'
312 ' Beautiful Soup.' % self._decode_markup(markup)
313 )
314 self._check_markup_is_url(markup)
315
316 rejections = []
317 success = False
318 for (self.markup, self.original_encoding, self.declared_html_encoding,
319 self.contains_replacement_characters) in (
320 self.builder.prepare_markup(
321 markup, from_encoding, exclude_encodings=exclude_encodings)):
322 self.reset()
323 try:
324 self._feed()
325 success = True
326 break
327 except ParserRejectedMarkup as e:
328 rejections.append(e)
329 pass
330
331 if not success:
332 other_exceptions = [str(e) for e in rejections]
333 raise ParserRejectedMarkup(
334 "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
335 )
336
337 # Clear out the markup and remove the builder's circular
338 # reference to this object.
339 self.markup = None
340 self.builder.soup = None
341
342 def __copy__(self):
343 """Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
344 copy = type(self)(
345 self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
346 )
347
348 # Although we encoded the tree to UTF-8, that may not have
349 # been the encoding of the original markup. Set the copy's
350 # .original_encoding to reflect the original object's
351 # .original_encoding.
352 copy.original_encoding = self.original_encoding
353 return copy
354
355 def __getstate__(self):
356 # Frequently a tree builder can't be pickled.
357 d = dict(self.__dict__)
358 if 'builder' in d and not self.builder.picklable:
359 d['builder'] = None
360 return d
361
362 @classmethod
363 def _decode_markup(cls, markup):
364 """Ensure `markup` is bytes so it's safe to send into warnings.warn.
365
366 TODO: warnings.warn had this problem back in 2010 but it might not
367 anymore.
368 """
369 if isinstance(markup, bytes):
370 decoded = markup.decode('utf-8', 'replace')
371 else:
372 decoded = markup
373 return decoded
374
375 @classmethod
376 def _check_markup_is_url(cls, markup):
377 """Error-handling method to raise a warning if incoming markup looks
378 like a URL.
379
380 :param markup: A string.
381 """
382 if isinstance(markup, bytes):
383 space = b' '
384 cant_start_with = (b"http:", b"https:")
385 elif isinstance(markup, str):
386 space = ' '
387 cant_start_with = ("http:", "https:")
388 else:
389 return
390
391 if any(markup.startswith(prefix) for prefix in cant_start_with):
392 if not space in markup:
393 warnings.warn(
394 '"%s" looks like a URL. Beautiful Soup is not an'
395 ' HTTP client. You should probably use an HTTP client like'
396 ' requests to get the document behind the URL, and feed'
397 ' that document to Beautiful Soup.' % cls._decode_markup(
398 markup
399 )
400 )
401
402 def _feed(self):
403 """Internal method that parses previously set markup, creating a large
404 number of Tag and NavigableString objects.
405 """
406 # Convert the document to Unicode.
407 self.builder.reset()
408
409 self.builder.feed(self.markup)
410 # Close out any unfinished strings and close all the open tags.
411 self.endData()
412 while self.currentTag.name != self.ROOT_TAG_NAME:
413 self.popTag()
414
415 def reset(self):
416 """Reset this object to a state as though it had never parsed any
417 markup.
418 """
419 Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
420 self.hidden = 1
421 self.builder.reset()
422 self.current_data = []
423 self.currentTag = None
424 self.tagStack = []
425 self.preserve_whitespace_tag_stack = []
426 self.string_container_stack = []
427 self.pushTag(self)
428
429 def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
430 sourceline=None, sourcepos=None, **kwattrs):
431 """Create a new Tag associated with this BeautifulSoup object."""
432 kwattrs.update(attrs)
433 return self.element_classes.get(Tag, Tag)(
434 None, self.builder, name, namespace, nsprefix, kwattrs,
435 sourceline=sourceline, sourcepos=sourcepos
436 )
437
438 def string_container(self, base_class=None):
439 container = base_class or NavigableString
440
441 # There may be a general override of NavigableString.
442 container = self.element_classes.get(
443 container, container
444 )
445
446 # On top of that, we may be inside a tag that needs a special
447 # container class.
448 if self.string_container_stack:
449 container = self.builder.string_containers.get(
450 self.string_container_stack[-1].name, container
451 )
452 return container
453
454 def new_string(self, s, subclass=None):
455 """Create a new NavigableString associated with this BeautifulSoup
456 object.
457 """
458 container = self.string_container(subclass)
459 return container(s)
460
461 def insert_before(self, successor):
462 """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
463 it because there is nothing before or after it in the parse tree.
464 """
465 raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
466
467 def insert_after(self, successor):
468 """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
469 it because there is nothing before or after it in the parse tree.
470 """
471 raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
472
473 def popTag(self):
474 """Internal method called by _popToTag when a tag is closed."""
475 tag = self.tagStack.pop()
476 if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
477 self.preserve_whitespace_tag_stack.pop()
478 if self.string_container_stack and tag == self.string_container_stack[-1]:
479 self.string_container_stack.pop()
480 #print "Pop", tag.name
481 if self.tagStack:
482 self.currentTag = self.tagStack[-1]
483 return self.currentTag
484
485 def pushTag(self, tag):
486 """Internal method called by handle_starttag when a tag is opened."""
487 #print "Push", tag.name
488 if self.currentTag is not None:
489 self.currentTag.contents.append(tag)
490 self.tagStack.append(tag)
491 self.currentTag = self.tagStack[-1]
492 if tag.name in self.builder.preserve_whitespace_tags:
493 self.preserve_whitespace_tag_stack.append(tag)
494 if tag.name in self.builder.string_containers:
495 self.string_container_stack.append(tag)
496
497 def endData(self, containerClass=None):
498 """Method called by the TreeBuilder when the end of a data segment
499 occurs.
500 """
501 containerClass = self.string_container(containerClass)
502
503 if self.current_data:
504 current_data = ''.join(self.current_data)
505 # If whitespace is not preserved, and this string contains
506 # nothing but ASCII spaces, replace it with a single space
507 # or newline.
508 if not self.preserve_whitespace_tag_stack:
509 strippable = True
510 for i in current_data:
511 if i not in self.ASCII_SPACES:
512 strippable = False
513 break
514 if strippable:
515 if '\n' in current_data:
516 current_data = '\n'
517 else:
518 current_data = ' '
519
520 # Reset the data collector.
521 self.current_data = []
522
523 # Should we add this string to the tree at all?
524 if self.parse_only and len(self.tagStack) <= 1 and \
525 (not self.parse_only.text or \
526 not self.parse_only.search(current_data)):
527 return
528
529 o = containerClass(current_data)
530 self.object_was_parsed(o)
531
532 def object_was_parsed(self, o, parent=None, most_recent_element=None):
533 """Method called by the TreeBuilder to integrate an object into the parse tree."""
534 if parent is None:
535 parent = self.currentTag
536 if most_recent_element is not None:
537 previous_element = most_recent_element
538 else:
539 previous_element = self._most_recent_element
540
541 next_element = previous_sibling = next_sibling = None
542 if isinstance(o, Tag):
543 next_element = o.next_element
544 next_sibling = o.next_sibling
545 previous_sibling = o.previous_sibling
546 if previous_element is None:
547 previous_element = o.previous_element
548
549 fix = parent.next_element is not None
550
551 o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
552
553 self._most_recent_element = o
554 parent.contents.append(o)
555
556 # Check if we are inserting into an already parsed node.
557 if fix:
558 self._linkage_fixer(parent)
559
560 def _linkage_fixer(self, el):
561 """Make sure linkage of this fragment is sound."""
562
563 first = el.contents[0]
564 child = el.contents[-1]
565 descendant = child
566
567 if child is first and el.parent is not None:
568 # Parent should be linked to first child
569 el.next_element = child
570 # We are no longer linked to whatever this element is
571 prev_el = child.previous_element
572 if prev_el is not None and prev_el is not el:
573 prev_el.next_element = None
574 # First child should be linked to the parent, and no previous siblings.
575 child.previous_element = el
576 child.previous_sibling = None
577
578 # We have no sibling as we've been appended as the last.
579 child.next_sibling = None
580
581 # This index is a tag, dig deeper for a "last descendant"
582 if isinstance(child, Tag) and child.contents:
583 descendant = child._last_descendant(False)
584
585 # As the final step, link last descendant. It should be linked
586 # to the parent's next sibling (if found), else walk up the chain
587 # and find a parent with a sibling. It should have no next sibling.
588 descendant.next_element = None
589 descendant.next_sibling = None
590 target = el
591 while True:
592 if target is None:
593 break
594 elif target.next_sibling is not None:
595 descendant.next_element = target.next_sibling
596 target.next_sibling.previous_element = child
597 break
598 target = target.parent
599
600 def _popToTag(self, name, nsprefix=None, inclusivePop=True):
601 """Pops the tag stack up to and including the most recent
602 instance of the given tag.
603
604 :param name: Pop up to the most recent tag with this name.
605 :param nsprefix: The namespace prefix that goes with `name`.
606 :param inclusivePop: It this is false, pops the tag stack up
607 to but *not* including the most recent instqance of the
608 given tag.
609 """
610 #print "Popping to %s" % name
611 if name == self.ROOT_TAG_NAME:
612 # The BeautifulSoup object itself can never be popped.
613 return
614
615 most_recently_popped = None
616
617 stack_size = len(self.tagStack)
618 for i in range(stack_size - 1, 0, -1):
619 t = self.tagStack[i]
620 if (name == t.name and nsprefix == t.prefix):
621 if inclusivePop:
622 most_recently_popped = self.popTag()
623 break
624 most_recently_popped = self.popTag()
625
626 return most_recently_popped
627
628 def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
629 sourcepos=None):
630 """Called by the tree builder when a new tag is encountered.
631
632 :param name: Name of the tag.
633 :param nsprefix: Namespace prefix for the tag.
634 :param attrs: A dictionary of attribute values.
635 :param sourceline: The line number where this tag was found in its
636 source document.
637 :param sourcepos: The character position within `sourceline` where this
638 tag was found.
639
640 If this method returns None, the tag was rejected by an active
641 SoupStrainer. You should proceed as if the tag had not occurred
642 in the document. For instance, if this was a self-closing tag,
643 don't call handle_endtag.
644 """
645 # print "Start tag %s: %s" % (name, attrs)
646 self.endData()
647
648 if (self.parse_only and len(self.tagStack) <= 1
649 and (self.parse_only.text
650 or not self.parse_only.search_tag(name, attrs))):
651 return None
652
653 tag = self.element_classes.get(Tag, Tag)(
654 self, self.builder, name, namespace, nsprefix, attrs,
655 self.currentTag, self._most_recent_element,
656 sourceline=sourceline, sourcepos=sourcepos
657 )
658 if tag is None:
659 return tag
660 if self._most_recent_element is not None:
661 self._most_recent_element.next_element = tag
662 self._most_recent_element = tag
663 self.pushTag(tag)
664 return tag
665
666 def handle_endtag(self, name, nsprefix=None):
667 """Called by the tree builder when an ending tag is encountered.
668
669 :param name: Name of the tag.
670 :param nsprefix: Namespace prefix for the tag.
671 """
672 #print "End tag: " + name
673 self.endData()
674 self._popToTag(name, nsprefix)
675
676 def handle_data(self, data):
677 """Called by the tree builder when a chunk of textual data is encountered."""
678 self.current_data.append(data)
679
680 def decode(self, pretty_print=False,
681 eventual_encoding=DEFAULT_OUTPUT_ENCODING,
682 formatter="minimal"):
683 """Returns a string or Unicode representation of the parse tree
684 as an HTML or XML document.
685
686 :param pretty_print: If this is True, indentation will be used to
687 make the document more readable.
688 :param eventual_encoding: The encoding of the final document.
689 If this is None, the document will be a Unicode string.
690 """
691 if self.is_xml:
692 # Print the XML declaration
693 encoding_part = ''
694 if eventual_encoding != None:
695 encoding_part = ' encoding="%s"' % eventual_encoding
696 prefix = '<?xml version="1.0"%s?>\n' % encoding_part
697 else:
698 prefix = ''
699 if not pretty_print:
700 indent_level = None
701 else:
702 indent_level = 0
703 return prefix + super(BeautifulSoup, self).decode(
704 indent_level, eventual_encoding, formatter)
705
706 # Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
707 _s = BeautifulSoup
708 _soup = BeautifulSoup
709
710 class BeautifulStoneSoup(BeautifulSoup):
711 """Deprecated interface to an XML parser."""
712
713 def __init__(self, *args, **kwargs):
714 kwargs['features'] = 'xml'
715 warnings.warn(
716 'The BeautifulStoneSoup class is deprecated. Instead of using '
717 'it, pass features="xml" into the BeautifulSoup constructor.')
718 super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
719
720
721 class StopParsing(Exception):
722 """Exception raised by a TreeBuilder if it's unable to continue parsing."""
723 pass
724
725 class FeatureNotFound(ValueError):
726 """Exception raised by the BeautifulSoup constructor if no parser with the
727 requested features is found.
728 """
729 pass
730
731
732 #If this file is run as a script, act as an HTML pretty-printer.
733 if __name__ == '__main__':
734 import sys
735 soup = BeautifulSoup(sys.stdin)
736 print(soup.prettify())