From 82cd08afd20af6bde424c2c7b64e0c5c6a90cf3d Mon Sep 17 00:00:00 2001
From: Akhil Narang <me@akhilnarang.dev>
Date: Mon, 19 Aug 2024 10:49:21 +0530
Subject: [PATCH] fix(sanitize_html): sanitize all string inputs

No reason to exclude JSON and what bs4 *thinks* is invalid HTML
The browser is happy to render both of those if its remotely valid

Signed-off-by: Akhil Narang <me@akhilnarang.dev>
---
 frappe/core/doctype/comment/comment.py |  2 +-
 frappe/utils/html_utils.py             | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/frappe/core/doctype/comment/comment.py b/frappe/core/doctype/comment/comment.py
index f2393427eb..e9fdbbca96 100644
--- a/frappe/core/doctype/comment/comment.py
+++ b/frappe/core/doctype/comment/comment.py
@@ -62,7 +62,7 @@ class Comment(Document):
 	def validate(self):
 		if not self.comment_email:
 			self.comment_email = frappe.session.user
-		self.content = frappe.utils.sanitize_html(self.content)
+		self.content = frappe.utils.sanitize_html(self.content, always_sanitize=True)
 
 	def on_update(self):
 		update_comment_in_doc(self)
diff --git a/frappe/utils/html_utils.py b/frappe/utils/html_utils.py
index ca1cbc3a74..7b6fc83149 100644
--- a/frappe/utils/html_utils.py
+++ b/frappe/utils/html_utils.py
@@ -142,12 +142,12 @@ def clean_script_and_style(html):
 	return frappe.as_unicode(soup)
 
 
-def sanitize_html(html, linkify=False):
+def sanitize_html(html, linkify=False, always_sanitize=False):
 	"""
 	Sanitize HTML tags, attributes and style to prevent XSS attacks
 	Based on bleach clean, bleach whitelist and html5lib's Sanitizer defaults
 
-	Does not sanitize JSON, as it could lead to future problems
+	Does not sanitize JSON unless explicitly specified, as it could lead to future problems
 	"""
 	import bleach
 	from bleach.css_sanitizer import CSSSanitizer
@@ -156,11 +156,12 @@ def sanitize_html(html, linkify=False):
 	if not isinstance(html, str):
 		return html
 
-	elif is_json(html):
-		return html
+	if not always_sanitize:
+		if is_json(html):
+			return html
 
-	if not bool(BeautifulSoup(html, "html.parser").find()):
-		return html
+		if not bool(BeautifulSoup(html, "html.parser").find()):
+			return html
 
 	tags = (
 		acceptable_elements