From 82cd08afd20af6bde424c2c7b64e0c5c6a90cf3d Mon Sep 17 00:00:00 2001 From: Akhil Narang Date: Mon, 19 Aug 2024 10:49:21 +0530 Subject: [PATCH] fix(sanitize_html): sanitize all string inputs No reason to exclude JSON and what bs4 *thinks* is invalid HTML The browser is happy to render both of those if its remotely valid Signed-off-by: Akhil Narang --- frappe/core/doctype/comment/comment.py | 2 +- frappe/utils/html_utils.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/frappe/core/doctype/comment/comment.py b/frappe/core/doctype/comment/comment.py index f2393427eb..e9fdbbca96 100644 --- a/frappe/core/doctype/comment/comment.py +++ b/frappe/core/doctype/comment/comment.py @@ -62,7 +62,7 @@ class Comment(Document): def validate(self): if not self.comment_email: self.comment_email = frappe.session.user - self.content = frappe.utils.sanitize_html(self.content) + self.content = frappe.utils.sanitize_html(self.content, always_sanitize=True) def on_update(self): update_comment_in_doc(self) diff --git a/frappe/utils/html_utils.py b/frappe/utils/html_utils.py index ca1cbc3a74..7b6fc83149 100644 --- a/frappe/utils/html_utils.py +++ b/frappe/utils/html_utils.py @@ -142,12 +142,12 @@ def clean_script_and_style(html): return frappe.as_unicode(soup) -def sanitize_html(html, linkify=False): +def sanitize_html(html, linkify=False, always_sanitize=False): """ Sanitize HTML tags, attributes and style to prevent XSS attacks Based on bleach clean, bleach whitelist and html5lib's Sanitizer defaults - Does not sanitize JSON, as it could lead to future problems + Does not sanitize JSON unless explicitly specified, as it could lead to future problems """ import bleach from bleach.css_sanitizer import CSSSanitizer @@ -156,11 +156,12 @@ def sanitize_html(html, linkify=False): if not isinstance(html, str): return html - elif is_json(html): - return html + if not always_sanitize: + if is_json(html): + return html - if not bool(BeautifulSoup(html, "html.parser").find()): - return html + if not bool(BeautifulSoup(html, "html.parser").find()): + return html tags = ( acceptable_elements