seitime-frappe/frappe/utils/html_utils.py
2023-07-23 20:00:28 -05:00

757 lines
10 KiB
Python

import json
import re
from bleach_allowlist import bleach_allowlist
import frappe
from frappe.utils.data import escape_html
EMOJI_PATTERN = re.compile(
"(\ud83d[\ude00-\ude4f])|"
"(\ud83c[\udf00-\uffff])|"
"(\ud83d[\u0000-\uddff])|"
"(\ud83d[\ude80-\udeff])|"
"(\ud83c[\udde0-\uddff])"
"+",
flags=re.UNICODE,
)
def clean_html(html):
import bleach
from bleach.css_sanitizer import CSSSanitizer
if not isinstance(html, str):
return html
return bleach.clean(
clean_script_and_style(html),
tags={
"div",
"p",
"br",
"ul",
"ol",
"li",
"strong",
"b",
"em",
"i",
"u",
"table",
"thead",
"tbody",
"td",
"tr",
},
attributes=[],
css_sanitizer=CSSSanitizer(allowed_css_properties=["color", "border", "border-color"]),
strip=True,
strip_comments=True,
)
def clean_email_html(html):
import bleach
from bleach.css_sanitizer import CSSSanitizer
if not isinstance(html, str):
return html
css_sanitizer = CSSSanitizer(
allowed_css_properties=[
"color",
"border-color",
"width",
"height",
"max-width",
"background-color",
"border-collapse",
"border-radius",
"border",
"border-top",
"border-bottom",
"border-left",
"border-right",
"margin",
"margin-top",
"margin-bottom",
"margin-left",
"margin-right",
"padding",
"padding-top",
"padding-bottom",
"padding-left",
"padding-right",
"font-size",
"font-weight",
"font-family",
"text-decoration",
"line-height",
"text-align",
"vertical-align",
"display",
]
)
return bleach.clean(
clean_script_and_style(html),
tags={
"div",
"p",
"br",
"ul",
"ol",
"li",
"strong",
"b",
"em",
"i",
"u",
"a",
"table",
"thead",
"tbody",
"td",
"tr",
"th",
"pre",
"code",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"button",
"img",
},
attributes=["border", "colspan", "rowspan", "src", "href", "style", "id"],
css_sanitizer=css_sanitizer,
protocols=["cid", "http", "https", "mailto", "data"],
strip=True,
strip_comments=True,
)
def clean_script_and_style(html):
# remove script and style
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html5lib")
for s in soup(["script", "style"]):
s.decompose()
return frappe.as_unicode(soup)
def sanitize_html(html, linkify=False):
"""
Sanitize HTML tags, attributes and style to prevent XSS attacks
Based on bleach clean, bleach whitelist and html5lib's Sanitizer defaults
Does not sanitize JSON, as it could lead to future problems
"""
import bleach
from bleach.css_sanitizer import CSSSanitizer
from bs4 import BeautifulSoup
if not isinstance(html, str):
return html
elif is_json(html):
return html
if not bool(BeautifulSoup(html, "html.parser").find()):
return html
tags = (
acceptable_elements
+ svg_elements
+ mathml_elements
+ ["html", "head", "meta", "link", "body", "style", "o:p"]
)
def attributes_filter(tag, name, value):
if name.startswith("data-"):
return True
return name in acceptable_attributes
attributes = {"*": attributes_filter, "svg": svg_attributes}
css_sanitizer = CSSSanitizer(allowed_css_properties=bleach_allowlist.all_styles)
# returns html with escaped tags, escaped orphan >, <, etc.
escaped_html = bleach.clean(
html,
tags=tags,
attributes=attributes,
css_sanitizer=css_sanitizer,
strip_comments=False,
protocols={"cid", "http", "https", "mailto"},
)
return escaped_html
def is_json(text):
try:
json.loads(text)
except ValueError:
return False
else:
return True
def get_icon_html(icon, small=False):
from frappe.utils import is_image
icon = icon or ""
if icon and EMOJI_PATTERN.match(icon):
return f'<span class="text-muted">{icon}</span>'
if is_image(icon):
return (
f"<img style='width: 16px; height: 16px;' src={escape_html(icon)!r}>"
if small
else f"<img src={escape_html(icon)!r}>"
)
else:
return f"<i class={escape_html(icon)!r}></i>"
def unescape_html(value):
from html import unescape
return unescape(value)
# adapted from https://raw.githubusercontent.com/html5lib/html5lib-python/4aa79f113e7486c7ec5d15a6e1777bfe546d3259/html5lib/sanitizer.py
acceptable_elements = [
"a",
"abbr",
"acronym",
"address",
"area",
"article",
"aside",
"audio",
"b",
"big",
"blockquote",
"br",
"button",
"canvas",
"caption",
"center",
"cite",
"code",
"col",
"colgroup",
"command",
"datagrid",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"dir",
"div",
"dl",
"dt",
"em",
"event-source",
"fieldset",
"figcaption",
"figure",
"footer",
"font",
"form",
"header",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"hr",
"i",
"img",
"input",
"ins",
"keygen",
"kbd",
"label",
"legend",
"li",
"m",
"map",
"mark",
"menu",
"meter",
"multicol",
"nav",
"nextid",
"ol",
"output",
"optgroup",
"option",
"p",
"pre",
"progress",
"q",
"s",
"samp",
"section",
"select",
"small",
"sound",
"source",
"spacer",
"span",
"strike",
"strong",
"sub",
"sup",
"table",
"tbody",
"td",
"textarea",
"time",
"tfoot",
"th",
"thead",
"tr",
"tt",
"u",
"ul",
"var",
"video",
]
mathml_elements = [
"maction",
"math",
"merror",
"mfrac",
"mi",
"mmultiscripts",
"mn",
"mo",
"mover",
"mpadded",
"mphantom",
"mprescripts",
"mroot",
"mrow",
"mspace",
"msqrt",
"mstyle",
"msub",
"msubsup",
"msup",
"mtable",
"mtd",
"mtext",
"mtr",
"munder",
"munderover",
"none",
]
svg_elements = [
"a",
"animate",
"animateColor",
"animateMotion",
"animateTransform",
"clipPath",
"circle",
"defs",
"desc",
"ellipse",
"font-face",
"font-face-name",
"font-face-src",
"g",
"glyph",
"hkern",
"linearGradient",
"line",
"marker",
"metadata",
"missing-glyph",
"mpath",
"path",
"polygon",
"polyline",
"radialGradient",
"rect",
"set",
"stop",
"svg",
"switch",
"text",
"title",
"tspan",
"use",
]
acceptable_attributes = [
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"align",
"alt",
"autocomplete",
"autofocus",
"axis",
"background",
"balance",
"bgcolor",
"bgproperties",
"border",
"bordercolor",
"bordercolordark",
"bordercolorlight",
"bottompadding",
"cellpadding",
"cellspacing",
"ch",
"challenge",
"char",
"charoff",
"choff",
"charset",
"checked",
"cite",
"class",
"clear",
"color",
"cols",
"colspan",
"compact",
"content",
"contenteditable",
"controls",
"coords",
"data",
"datafld",
"datapagesize",
"datasrc",
"datetime",
"default",
"delay",
"dir",
"disabled",
"draggable",
"dynsrc",
"enctype",
"end",
"face",
"for",
"form",
"frame",
"galleryimg",
"gutter",
"headers",
"height",
"hidefocus",
"hidden",
"high",
"href",
"hreflang",
"hspace",
"icon",
"id",
"inputmode",
"ismap",
"keytype",
"label",
"leftspacing",
"lang",
"list",
"longdesc",
"loop",
"loopcount",
"loopend",
"loopstart",
"low",
"lowsrc",
"max",
"maxlength",
"media",
"method",
"min",
"multiple",
"name",
"nohref",
"noshade",
"nowrap",
"open",
"optimum",
"pattern",
"ping",
"point-size",
"poster",
"pqg",
"preload",
"prompt",
"radiogroup",
"readonly",
"rel",
"repeat-max",
"repeat-min",
"replace",
"required",
"rev",
"rightspacing",
"rows",
"rowspan",
"rules",
"scope",
"selected",
"shape",
"size",
"span",
"src",
"start",
"step",
"style",
"summary",
"suppress",
"tabindex",
"target",
"template",
"title",
"toppadding",
"type",
"unselectable",
"usemap",
"urn",
"valign",
"value",
"variable",
"volume",
"vspace",
"vrml",
"width",
"wrap",
"xml:lang",
"data-row",
"data-list",
"data-language",
"data-value",
"role",
"frameborder",
"allowfullscreen",
"spellcheck",
"data-mode",
"data-gramm",
"data-placeholder",
"data-comment",
"data-id",
"data-denotation-char",
"itemprop",
"itemscope",
"itemtype",
"itemid",
"itemref",
"datetime",
"data-is-group",
]
mathml_attributes = [
"actiontype",
"align",
"columnalign",
"columnalign",
"columnalign",
"columnlines",
"columnspacing",
"columnspan",
"depth",
"display",
"displaystyle",
"equalcolumns",
"equalrows",
"fence",
"fontstyle",
"fontweight",
"frame",
"height",
"linethickness",
"lspace",
"mathbackground",
"mathcolor",
"mathvariant",
"mathvariant",
"maxsize",
"minsize",
"other",
"rowalign",
"rowalign",
"rowalign",
"rowlines",
"rowspacing",
"rowspan",
"rspace",
"scriptlevel",
"selection",
"separator",
"stretchy",
"width",
"width",
"xlink:href",
"xlink:show",
"xlink:type",
"xmlns",
"xmlns:xlink",
]
svg_attributes = [
"accent-height",
"accumulate",
"additive",
"alphabetic",
"arabic-form",
"ascent",
"attributeName",
"attributeType",
"baseProfile",
"bbox",
"begin",
"by",
"calcMode",
"cap-height",
"class",
"clip-path",
"color",
"color-rendering",
"content",
"colwidth",
"cx",
"cy",
"d",
"dx",
"dy",
"descent",
"display",
"dur",
"end",
"fill",
"fill-opacity",
"fill-rule",
"font-family",
"font-size",
"font-stretch",
"font-style",
"font-variant",
"font-weight",
"from",
"fx",
"fy",
"g1",
"g2",
"glyph-name",
"gradientUnits",
"hanging",
"height",
"horiz-adv-x",
"horiz-origin-x",
"id",
"ideographic",
"k",
"keyPoints",
"keySplines",
"keyTimes",
"lang",
"marker-end",
"marker-mid",
"marker-start",
"markerHeight",
"markerUnits",
"markerWidth",
"mathematical",
"max",
"min",
"name",
"offset",
"opacity",
"orient",
"origin",
"overline-position",
"overline-thickness",
"panose-1",
"path",
"pathLength",
"points",
"preserveAspectRatio",
"r",
"refX",
"refY",
"repeatCount",
"repeatDur",
"requiredExtensions",
"requiredFeatures",
"restart",
"rotate",
"rx",
"ry",
"slope",
"stemh",
"stemv",
"stop-color",
"stop-opacity",
"strikethrough-position",
"strikethrough-thickness",
"stroke",
"stroke-dasharray",
"stroke-dashoffset",
"stroke-linecap",
"stroke-linejoin",
"stroke-miterlimit",
"stroke-opacity",
"stroke-width",
"systemLanguage",
"target",
"text-anchor",
"to",
"transform",
"type",
"u1",
"u2",
"underline-position",
"underline-thickness",
"unicode",
"unicode-range",
"units-per-em",
"values",
"version",
"viewBox",
"visibility",
"width",
"widths",
"x",
"x-height",
"x1",
"x2",
"xlink:actuate",
"xlink:arcrole",
"xlink:href",
"xlink:role",
"xlink:show",
"xlink:title",
"xlink:type",
"xml:base",
"xml:lang",
"xml:space",
"xmlns",
"xmlns:xlink",
"y",
"y1",
"y2",
"zoomAndPan",
]