Skip to content

Commit

Permalink
Merge pull request #693 from willkg/649-tags-fix
Browse files Browse the repository at this point in the history
Change elements to tags (#649)
  • Loading branch information
willkg authored Jan 21, 2023
2 parents 4971efa + 05d3a4a commit b2a0d57
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 44 deletions.
2 changes: 1 addition & 1 deletion bleach/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def clean(
:arg str text: the text to clean
:arg list tags: allowed list of tags; defaults to
:arg set tags: set of allowed tags; defaults to
``bleach.sanitizer.ALLOWED_TAGS``
:arg dict attributes: allowed attributes; can be a callable, list or dict;
Expand Down
3 changes: 3 additions & 0 deletions bleach/html5lib_shim.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
allowed_protocols,
allowed_css_properties,
allowed_svg_properties,
attr_val_is_uri,
svg_attr_val_allows_ref,
svg_allow_local_href,
) # noqa: E402 module level import not at top of file
from bleach._vendor.html5lib.filters.sanitizer import (
Filter as SanitizerFilter,
Expand Down
96 changes: 53 additions & 43 deletions bleach/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,23 @@
from bleach import parse_shim


#: List of allowed tags
ALLOWED_TAGS = [
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"i",
"li",
"ol",
"strong",
"ul",
]
#: Set of allowed tags
ALLOWED_TAGS = frozenset(
(
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"i",
"li",
"ol",
"strong",
"ul",
)
)


#: Map of allowed attributes by tag
Expand Down Expand Up @@ -93,7 +95,7 @@ def __init__(
):
"""Initializes a Cleaner
:arg list tags: allowed list of tags; defaults to
:arg set tags: set of allowed tags; defaults to
``bleach.sanitizer.ALLOWED_TAGS``
:arg dict attributes: allowed attributes; can be a callable, list or dict;
Expand Down Expand Up @@ -190,13 +192,11 @@ def clean(self, text):
dom = self.parser.parseFragment(text)
filtered = BleachSanitizerFilter(
source=self.walker(dom),
# Bleach-sanitizer-specific things
allowed_tags=self.tags,
attributes=self.attributes,
strip_disallowed_elements=self.strip,
strip_disallowed_tags=self.strip,
strip_html_comments=self.strip_comments,
css_sanitizer=self.css_sanitizer,
# html5lib-sanitizer things
allowed_elements=self.tags,
allowed_protocols=self.protocols,
)

Expand Down Expand Up @@ -260,19 +260,21 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
def __init__(
self,
source,
allowed_elements=ALLOWED_TAGS,
allowed_tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
allowed_protocols=ALLOWED_PROTOCOLS,
strip_disallowed_elements=False,
attr_val_is_uri=html5lib_shim.attr_val_is_uri,
svg_attr_val_allows_ref=html5lib_shim.svg_attr_val_allows_ref,
svg_allow_local_href=html5lib_shim.svg_allow_local_href,
strip_disallowed_tags=False,
strip_html_comments=True,
css_sanitizer=None,
**kwargs,
):
"""Creates a BleachSanitizerFilter instance
:arg source: html5lib TreeWalker stream as an html5lib TreeWalker
:arg list allowed_elements: allowed list of tags; defaults to
:arg set allowed_tags: set of allowed tags; defaults to
``bleach.sanitizer.ALLOWED_TAGS``
:arg dict attributes: allowed attributes; can be a callable, list or dict;
Expand All @@ -281,33 +283,41 @@ def __init__(
:arg list allowed_protocols: allowed list of protocols for links; defaults
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
:arg bool strip_disallowed_elements: whether or not to strip disallowed
elements
:arg attr_val_is_uri: set of attributes that have URI values
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
references
:arg svg_allow_local_href: set of SVG elements that can have local
hrefs
:arg bool strip_disallowed_tags: whether or not to strip disallowed
tags
:arg bool strip_html_comments: whether or not to strip HTML comments
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
sanitizing style attribute values and style text; defaults to None
"""
# NOTE(willkg): This is the superclass of
# html5lib.filters.sanitizer.Filter. We call this directly skipping the
# __init__ for html5lib.filters.sanitizer.Filter because that does
# things we don't need to do and kicks up the deprecation warning for
# using Sanitizer.
html5lib_shim.Filter.__init__(self, source)

self.allowed_tags = frozenset(allowed_tags)
self.allowed_protocols = allowed_protocols

self.attr_filter = attribute_filter_factory(attributes)
self.strip_disallowed_elements = strip_disallowed_elements
self.strip_disallowed_tags = strip_disallowed_tags
self.strip_html_comments = strip_html_comments
self.css_sanitizer = css_sanitizer

# filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
warnings.filterwarnings(
"ignore",
message="html5lib's sanitizer is deprecated",
category=DeprecationWarning,
module="bleach._vendor.html5lib",
)
return super().__init__(
source,
allowed_elements=allowed_elements,
allowed_protocols=allowed_protocols,
**kwargs,
)
self.attr_val_is_uri = attr_val_is_uri
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
self.css_sanitizer = css_sanitizer
self.svg_allow_local_href = svg_allow_local_href

def sanitize_stream(self, token_iterator):
for token in token_iterator:
Expand Down Expand Up @@ -377,10 +387,10 @@ def sanitize_token(self, token):
"""
token_type = token["type"]
if token_type in ["StartTag", "EndTag", "EmptyTag"]:
if token["name"] in self.allowed_elements:
if token["name"] in self.allowed_tags:
return self.allow_token(token)

elif self.strip_disallowed_elements:
elif self.strip_disallowed_tags:
return None

else:
Expand Down

0 comments on commit b2a0d57

Please sign in to comment.