fix(sanitize_html): sanitize all string inputs

No reason to exclude JSON and what bs4 *thinks* is invalid HTML
The browser is happy to render both of those if its remotely valid

Signed-off-by: Akhil Narang <me@akhilnarang.dev>
This commit is contained in:
Akhil Narang 2024-08-19 10:49:21 +05:30
parent d70190f8d3
commit 82cd08afd2
No known key found for this signature in database
GPG key ID: 9DCC61E211BF645F
2 changed files with 8 additions and 7 deletions

View file

@ -62,7 +62,7 @@ class Comment(Document):
def validate(self):
if not self.comment_email:
self.comment_email = frappe.session.user
self.content = frappe.utils.sanitize_html(self.content)
self.content = frappe.utils.sanitize_html(self.content, always_sanitize=True)
def on_update(self):
update_comment_in_doc(self)

View file

@ -142,12 +142,12 @@ def clean_script_and_style(html):
return frappe.as_unicode(soup)
def sanitize_html(html, linkify=False):
def sanitize_html(html, linkify=False, always_sanitize=False):
"""
Sanitize HTML tags, attributes and style to prevent XSS attacks
Based on bleach clean, bleach whitelist and html5lib's Sanitizer defaults
Does not sanitize JSON, as it could lead to future problems
Does not sanitize JSON unless explicitly specified, as it could lead to future problems
"""
import bleach
from bleach.css_sanitizer import CSSSanitizer
@ -156,11 +156,12 @@ def sanitize_html(html, linkify=False):
if not isinstance(html, str):
return html
elif is_json(html):
return html
if not always_sanitize:
if is_json(html):
return html
if not bool(BeautifulSoup(html, "html.parser").find()):
return html
if not bool(BeautifulSoup(html, "html.parser").find()):
return html
tags = (
acceptable_elements