import json
import re
import nh3
from bleach_allowlist import bleach_allowlist
import frappe
from frappe.utils.data import escape_html
EMOJI_PATTERN = re.compile(
"(\ud83d[\ude00-\ude4f])|"
"(\ud83c[\udf00-\uffff])|"
"(\ud83d[\u0000-\uddff])|"
"(\ud83d[\ude80-\udeff])|"
"(\ud83c[\udde0-\uddff])"
"+",
flags=re.UNICODE,
)
# tags for which content needs to be removed from output
REMOVE_CONTENT_TAGS = {"script", "style"}
def clean_html(html):
if not isinstance(html, str):
return html
return nh3.clean(
html,
tags={
"div",
"p",
"br",
"ul",
"ol",
"li",
"strong",
"b",
"em",
"i",
"u",
"table",
"thead",
"tbody",
"td",
"tr",
},
clean_content_tags=REMOVE_CONTENT_TAGS,
strip_comments=True,
)
def clean_email_html(html):
if not isinstance(html, str):
return html
allowed_css_properties = {
"color",
"border-color",
"width",
"height",
"max-width",
"background-color",
"border-collapse",
"border-radius",
"border",
"border-top",
"border-bottom",
"border-left",
"border-right",
"margin",
"margin-top",
"margin-bottom",
"margin-left",
"margin-right",
"padding",
"padding-top",
"padding-bottom",
"padding-left",
"padding-right",
"font-size",
"font-weight",
"font-family",
"text-decoration",
"line-height",
"text-align",
"vertical-align",
"display",
}
return nh3.clean(
html,
tags={
"div",
"p",
"br",
"ul",
"ol",
"li",
"strong",
"b",
"em",
"i",
"u",
"a",
"table",
"thead",
"tbody",
"td",
"tr",
"th",
"pre",
"code",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"button",
"img",
},
attributes={"*": {"border", "colspan", "rowspan", "src", "href", "style", "id"}},
clean_content_tags=REMOVE_CONTENT_TAGS,
filter_style_properties=allowed_css_properties,
strip_comments=True,
url_schemes=nh3.ALLOWED_URL_SCHEMES.union({"cid", "data"}),
)
def clean_script_and_style(html):
"""
Remove script and style tags.
DEPRECATED: prefer nh3.clean's clean_content_tags parameter.
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html5lib")
for s in soup(["script", "style"]):
s.decompose()
return frappe.as_unicode(soup)
def sanitize_html(html, linkify=False, always_sanitize=False, disallowed_tags=None):
"""
Sanitize HTML tags, attributes and style to prevent XSS attacks
Based on nh3 clean, bleach whitelist and html5lib's Sanitizer defaults
Does not sanitize JSON unless explicitly specified, as it could lead to future problems
"""
from bs4 import BeautifulSoup
if not isinstance(html, str):
return html
if not always_sanitize:
if is_json(html):
return html
if not bool(BeautifulSoup(html, "html.parser").find()):
return html
tags = (
acceptable_elements.union(svg_elements)
.union(mathml_elements)
.union(["html", "head", "meta", "link", "body", "o:p"])
)
# Allow caller to explicitly disallow some tags
if disallowed_tags:
tags.difference_update(disallowed_tags)
attributes = {"*": acceptable_attributes, "svg": svg_attributes}
# returns html with escaped tags, escaped orphan >, <, etc.
escaped_html = nh3.clean(
html,
tags=tags,
attributes=attributes,
generic_attribute_prefixes={"data-"},
strip_comments=False,
filter_style_properties=set(bleach_allowlist.all_styles),
url_schemes=nh3.ALLOWED_URL_SCHEMES.union({"cid"}),
)
return escaped_html
def is_json(text):
try:
json.loads(text)
except ValueError:
return False
else:
return True
def get_icon_html(icon, small=False):
from frappe.utils import is_image
icon = icon or ""
if icon and EMOJI_PATTERN.match(icon):
return f'{icon}'
if is_image(icon):
return (
f"
"
if small
else f"
"
)
else:
return f""
def unescape_html(value):
from html import unescape
return unescape(value)
# adapted from https://raw.githubusercontent.com/html5lib/html5lib-python/4aa79f113e7486c7ec5d15a6e1777bfe546d3259/html5lib/sanitizer.py
acceptable_elements = {
"a",
"abbr",
"acronym",
"address",
"area",
"article",
"aside",
"audio",
"b",
"big",
"blockquote",
"br",
"button",
"canvas",
"caption",
"center",
"cite",
"code",
"col",
"colgroup",
"command",
"datagrid",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"dir",
"div",
"dl",
"dt",
"em",
"event-source",
"fieldset",
"figcaption",
"figure",
"footer",
"font",
"form",
"header",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"hr",
"i",
"img",
"input",
"ins",
"keygen",
"kbd",
"label",
"legend",
"li",
"m",
"map",
"mark",
"menu",
"meter",
"multicol",
"nav",
"nextid",
"ol",
"output",
"optgroup",
"option",
"p",
"pre",
"progress",
"q",
"s",
"samp",
"section",
"select",
"small",
"sound",
"source",
"spacer",
"span",
"strike",
"strong",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"textarea",
"time",
"tfoot",
"th",
"thead",
"tr",
"tt",
"u",
"ul",
"var",
"video",
}
mathml_elements = {
"maction",
"math",
"merror",
"mfrac",
"mi",
"mmultiscripts",
"mn",
"mo",
"mover",
"mpadded",
"mphantom",
"mprescripts",
"mroot",
"mrow",
"mspace",
"msqrt",
"mstyle",
"msub",
"msubsup",
"msup",
"mtable",
"mtd",
"mtext",
"mtr",
"munder",
"munderover",
"none",
}
svg_elements = {
"a",
"animate",
"animateColor",
"animateMotion",
"animateTransform",
"clipPath",
"circle",
"defs",
"desc",
"ellipse",
"font-face",
"font-face-name",
"font-face-src",
"g",
"glyph",
"hkern",
"linearGradient",
"line",
"marker",
"metadata",
"missing-glyph",
"mpath",
"path",
"polygon",
"polyline",
"radialGradient",
"rect",
"set",
"stop",
"svg",
"switch",
"text",
"title",
"tspan",
"use",
}
acceptable_attributes = {
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"align",
"alt",
"autocomplete",
"autofocus",
"axis",
"background",
"balance",
"bgcolor",
"bgproperties",
"border",
"bordercolor",
"bordercolordark",
"bordercolorlight",
"bottompadding",
"cellpadding",
"cellspacing",
"ch",
"challenge",
"char",
"charoff",
"choff",
"charset",
"checked",
"cite",
"class",
"clear",
"color",
"cols",
"colspan",
"compact",
"content",
"contenteditable",
"controls",
"coords",
"data",
"datafld",
"datapagesize",
"datasrc",
"datetime",
"default",
"delay",
"dir",
"disabled",
"draggable",
"dynsrc",
"enctype",
"end",
"face",
"for",
"form",
"frame",
"galleryimg",
"gutter",
"headers",
"height",
"hidefocus",
"hidden",
"high",
"href",
"hreflang",
"hspace",
"icon",
"id",
"inputmode",
"ismap",
"keytype",
"label",
"leftspacing",
"lang",
"list",
"longdesc",
"loop",
"loopcount",
"loopend",
"loopstart",
"low",
"lowsrc",
"max",
"maxlength",
"media",
"method",
"min",
"multiple",
"name",
"nohref",
"noshade",
"nowrap",
"open",
"optimum",
"pattern",
"ping",
"point-size",
"poster",
"pqg",
"preload",
"prompt",
"radiogroup",
"readonly",
"repeat-max",
"repeat-min",
"replace",
"required",
"rev",
"rightspacing",
"rows",
"rowspan",
"rules",
"scope",
"selected",
"shape",
"size",
"span",
"src",
"start",
"step",
"style",
"summary",
"suppress",
"tabindex",
"target",
"template",
"title",
"toppadding",
"type",
"unselectable",
"usemap",
"urn",
"valign",
"value",
"variable",
"volume",
"vspace",
"vrml",
"width",
"wrap",
"xml:lang",
"data-row",
"data-list",
"data-language",
"data-value",
"role",
"frameborder",
"allowfullscreen",
"spellcheck",
"data-mode",
"data-gramm",
"data-placeholder",
"data-comment",
"data-id",
"data-denotation-char",
"itemprop",
"itemscope",
"itemtype",
"itemid",
"itemref",
"data-is-group",
}
mathml_attributes = {
"actiontype",
"align",
"columnalign",
"columnlines",
"columnspacing",
"columnspan",
"depth",
"display",
"displaystyle",
"equalcolumns",
"equalrows",
"fence",
"fontstyle",
"fontweight",
"frame",
"height",
"linethickness",
"lspace",
"mathbackground",
"mathcolor",
"mathvariant",
"maxsize",
"minsize",
"other",
"rowalign",
"rowlines",
"rowspacing",
"rowspan",
"rspace",
"scriptlevel",
"selection",
"separator",
"stretchy",
"width",
"xlink:href",
"xlink:show",
"xlink:type",
"xmlns",
"xmlns:xlink",
}
svg_attributes = {
"accent-height",
"accumulate",
"additive",
"alphabetic",
"arabic-form",
"ascent",
"attributeName",
"attributeType",
"baseProfile",
"bbox",
"begin",
"by",
"calcMode",
"cap-height",
"class",
"clip-path",
"color",
"color-rendering",
"content",
"colwidth",
"cx",
"cy",
"d",
"dx",
"dy",
"descent",
"display",
"dur",
"end",
"fill",
"fill-opacity",
"fill-rule",
"font-family",
"font-size",
"font-stretch",
"font-style",
"font-variant",
"font-weight",
"from",
"fx",
"fy",
"g1",
"g2",
"glyph-name",
"gradientUnits",
"hanging",
"height",
"horiz-adv-x",
"horiz-origin-x",
"id",
"ideographic",
"k",
"keyPoints",
"keySplines",
"keyTimes",
"lang",
"marker-end",
"marker-mid",
"marker-start",
"markerHeight",
"markerUnits",
"markerWidth",
"mathematical",
"max",
"min",
"name",
"offset",
"opacity",
"orient",
"origin",
"overline-position",
"overline-thickness",
"panose-1",
"path",
"pathLength",
"points",
"preserveAspectRatio",
"r",
"refX",
"refY",
"repeatCount",
"repeatDur",
"requiredExtensions",
"requiredFeatures",
"restart",
"rotate",
"rx",
"ry",
"slope",
"stemh",
"stemv",
"stop-color",
"stop-opacity",
"strikethrough-position",
"strikethrough-thickness",
"stroke",
"stroke-dasharray",
"stroke-dashoffset",
"stroke-linecap",
"stroke-linejoin",
"stroke-miterlimit",
"stroke-opacity",
"stroke-width",
"systemLanguage",
"target",
"text-anchor",
"to",
"transform",
"type",
"u1",
"u2",
"underline-position",
"underline-thickness",
"unicode",
"unicode-range",
"units-per-em",
"values",
"version",
"viewBox",
"visibility",
"width",
"widths",
"x",
"x-height",
"x1",
"x2",
"xlink:actuate",
"xlink:arcrole",
"xlink:href",
"xlink:role",
"xlink:show",
"xlink:title",
"xlink:type",
"xml:base",
"xml:lang",
"xml:space",
"xmlns",
"xmlns:xlink",
"y",
"y1",
"y2",
"zoomAndPan",
}