annotate env/lib/python3.7/site-packages/bleach/__init__.py @ 2:6af9afd405e9 draft

"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author shellac
date Thu, 14 May 2020 14:56:58 -0400
parents 26e78fe6e8c4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1 # -*- coding: utf-8 -*-
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
2
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
3 from __future__ import unicode_literals
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
4
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
5 from pkg_resources import parse_version
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
6
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
7 from bleach.linkifier import (
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
8 DEFAULT_CALLBACKS,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
9 Linker,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
10 )
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
11 from bleach.sanitizer import (
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
12 ALLOWED_ATTRIBUTES,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
13 ALLOWED_PROTOCOLS,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
14 ALLOWED_STYLES,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
15 ALLOWED_TAGS,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
16 Cleaner,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
17 )
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
18
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
19
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
20 # yyyymmdd
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
21 __releasedate__ = '20200324'
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
22 # x.y.z or x.y.z.dev0 -- semver
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
23 __version__ = '3.1.4'
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
24 VERSION = parse_version(__version__)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
25
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
26
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
27 __all__ = ['clean', 'linkify']
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
28
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
29
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
30 def clean(text, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
31 styles=ALLOWED_STYLES, protocols=ALLOWED_PROTOCOLS, strip=False,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
32 strip_comments=True):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
33 """Clean an HTML fragment of malicious content and return it
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
34
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
35 This function is a security-focused function whose sole purpose is to
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
36 remove malicious content from a string such that it can be displayed as
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
37 content in a web page.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
38
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
39 This function is not designed to use to transform content to be used in
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
40 non-web-page contexts.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
41
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
42 Example::
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
43
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
44 import bleach
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
45
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
46 better_text = bleach.clean(yucky_text)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
47
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
48
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
49 .. Note::
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
50
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
51 If you're cleaning a lot of text and passing the same argument values or
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
52 you want more configurability, consider using a
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
53 :py:class:`bleach.sanitizer.Cleaner` instance.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
54
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
55 :arg str text: the text to clean
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
56
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
57 :arg list tags: allowed list of tags; defaults to
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
58 ``bleach.sanitizer.ALLOWED_TAGS``
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
59
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
60 :arg dict attributes: allowed attributes; can be a callable, list or dict;
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
61 defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
62
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
63 :arg list styles: allowed list of css styles; defaults to
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
64 ``bleach.sanitizer.ALLOWED_STYLES``
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
65
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
66 :arg list protocols: allowed list of protocols for links; defaults
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
67 to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
68
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
69 :arg bool strip: whether or not to strip disallowed elements
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
70
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
71 :arg bool strip_comments: whether or not to strip HTML comments
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
72
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
73 :returns: cleaned text as unicode
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
74
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
75 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
76 cleaner = Cleaner(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
77 tags=tags,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
78 attributes=attributes,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
79 styles=styles,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
80 protocols=protocols,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
81 strip=strip,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
82 strip_comments=strip_comments,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
83 )
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
84 return cleaner.clean(text)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
85
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
86
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
87 def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
88 """Convert URL-like strings in an HTML fragment to links
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
89
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
90 This function converts strings that look like URLs, domain names and email
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
91 addresses in text that may be an HTML fragment to links, while preserving:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
92
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
93 1. links already in the string
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
94 2. urls found in attributes
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
95 3. email addresses
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
96
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
97 linkify does a best-effort approach and tries to recover from bad
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
98 situations due to crazy text.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
99
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
100 .. Note::
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
101
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
102 If you're linking a lot of text and passing the same argument values or
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
103 you want more configurability, consider using a
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
104 :py:class:`bleach.linkifier.Linker` instance.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
105
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
106 .. Note::
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
107
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
108 If you have text that you want to clean and then linkify, consider using
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
109 the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
110 pass. That way you're not parsing the HTML twice.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
111
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
112 :arg str text: the text to linkify
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
113
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
114 :arg list callbacks: list of callbacks to run when adjusting tag attributes;
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
115 defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
116
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
117 :arg list skip_tags: list of tags that you don't want to linkify the
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
118 contents of; for example, you could set this to ``['pre']`` to skip
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
119 linkifying contents of ``pre`` tags
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
120
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
121 :arg bool parse_email: whether or not to linkify email addresses
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
122
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
123 :returns: linkified text as unicode
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
124
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
125 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
126 linker = Linker(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
127 callbacks=callbacks,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
128 skip_tags=skip_tags,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
129 parse_email=parse_email
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
130 )
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
131 return linker.linkify(text)