From 8ff913b8adc052481cfbecf004e59fc35e96c97d Mon Sep 17 00:00:00 2001 From: David Arnold Date: Sun, 23 Jul 2023 19:32:04 -0500 Subject: [PATCH] build(deps): update bleach 3.3.0 -> 6.0.0 --- frappe/utils/html_utils.py | 92 ++++++++++++++++++++------------------ pyproject.toml | 2 +- 2 files changed, 50 insertions(+), 44 deletions(-) diff --git a/frappe/utils/html_utils.py b/frappe/utils/html_utils.py index a109af798b..d3c101349f 100644 --- a/frappe/utils/html_utils.py +++ b/frappe/utils/html_utils.py @@ -19,13 +19,14 @@ EMOJI_PATTERN = re.compile( def clean_html(html): import bleach + from bleach.css_sanitizer import CSSSanitizer if not isinstance(html, str): return html return bleach.clean( clean_script_and_style(html), - tags=[ + tags={ "div", "p", "br", @@ -42,9 +43,9 @@ def clean_html(html): "tbody", "td", "tr", - ], + }, attributes=[], - styles=["color", "border", "border-color"], + css_sanitizer=CSSSanitizer(allowed_css_properties=["color", "border", "border-color"]), strip=True, strip_comments=True, ) @@ -52,44 +53,13 @@ def clean_html(html): def clean_email_html(html): import bleach + from bleach.css_sanitizer import CSSSanitizer if not isinstance(html, str): return html - return bleach.clean( - clean_script_and_style(html), - tags=[ - "div", - "p", - "br", - "ul", - "ol", - "li", - "strong", - "b", - "em", - "i", - "u", - "a", - "table", - "thead", - "tbody", - "td", - "tr", - "th", - "pre", - "code", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "button", - "img", - ], - attributes=["border", "colspan", "rowspan", "src", "href", "style", "id"], - styles=[ + css_sanitizer = CSSSanitizer( + allowed_css_properties=[ "color", "border-color", "width", @@ -121,7 +91,43 @@ def clean_email_html(html): "text-align", "vertical-align", "display", - ], + ] + ) + + return bleach.clean( + clean_script_and_style(html), + tags={ + "div", + "p", + "br", + "ul", + "ol", + "li", + "strong", + "b", + "em", + "i", + "u", + "a", + "table", + "thead", + "tbody", + "td", + "tr", + "th", + "pre", + "code", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "button", + "img", + }, + attributes=["border", "colspan", "rowspan", "src", "href", "style", "id"], + css_sanitizer=css_sanitizer, protocols=["cid", "http", "https", "mailto", "data"], strip=True, strip_comments=True, @@ -146,6 +152,7 @@ def sanitize_html(html, linkify=False): Does not sanitize JSON, as it could lead to future problems """ import bleach + from bleach.css_sanitizer import CSSSanitizer from bs4 import BeautifulSoup if not isinstance(html, str): @@ -170,17 +177,16 @@ def sanitize_html(html, linkify=False): return name in acceptable_attributes attributes = {"*": attributes_filter, "svg": svg_attributes} - styles = bleach_allowlist.all_styles - strip_comments = False + css_sanitizer = CSSSanitizer(allowed_css_properties=bleach_allowlist.all_styles) # returns html with escaped tags, escaped orphan >, <, etc. escaped_html = bleach.clean( html, tags=tags, attributes=attributes, - styles=styles, - strip_comments=strip_comments, - protocols=["cid", "http", "https", "mailto"], + css_sanitizer=css_sanitizer, + strip_comments=False, + protocols={"cid", "http", "https", "mailto"}, ) return escaped_html diff --git a/pyproject.toml b/pyproject.toml index b2b890c24c..33e0ff9f1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dependencies = [ "Whoosh~=2.7.4", "beautifulsoup4~=4.12.2", "bleach-allowlist~=1.0.3", - "bleach~=3.3.0", + "bleach[css]~=6.0.0", "cairocffi==1.5.1", "chardet~=5.1.0", "croniter~=1.3.15",