seitime-frappe/frappe/utils/html_utils.py
dependabot[bot] bdb77ba521
chore(deps): bump bleach from 2.1.4 to 3.1.2 (#9779)
* chore(deps): bump bleach from 2.1.4 to 3.1.2

Bumps [bleach](https://github.com/mozilla/bleach) from 2.1.4 to 3.1.2.
- [Release notes](https://github.com/mozilla/bleach/releases)
- [Changelog](https://github.com/mozilla/bleach/blob/master/CHANGES)
- [Commits](https://github.com/mozilla/bleach/compare/v2.1.4...v3.1.2)

Signed-off-by: dependabot[bot] <support@github.com>

* chore: use html.parser instead of html5lib

bleach no longer ships html5lib as a requirement and instead has
included it in the system. we can switch to using html.parser instead;
which is not an external dependency unlike html5lib.

source: https://github.com/mozilla/bleach/blob/master/CHANGES#L206

Signed-off-by: Chinmay D. Pai <chinmaydpai@gmail.com>

* chore: use html5lib from bleach

Signed-off-by: Chinmay D. Pai <chinmaydpai@gmail.com>

* chore: import html5lib from bleach._vendor

fixes issue where bs4 is unable to find html5lib in the tree:

bs4.FeatureNotFound: Couldn't find a tree builder with the features you requested: bleach._vendor.html5lib. Do you need to install a parser library?

Signed-off-by: Chinmay D. Pai <chinmaydpai@gmail.com>

* chore: just give up and install html5lib as a dependency

Signed-off-by: Chinmay D. Pai <chinmaydpai@gmail.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Chinmay D. Pai <chinmaydpai@gmail.com>
2020-03-25 00:20:57 +05:30

213 lines
9.1 KiB
Python

from __future__ import unicode_literals
import frappe
import json
import re
import bleach
import bleach_whitelist.bleach_whitelist as bleach_whitelist
from six import string_types
from bs4 import BeautifulSoup
def clean_html(html):
if not isinstance(html, string_types):
return html
return bleach.clean(clean_script_and_style(html),
tags=['div', 'p', 'br', 'ul', 'ol', 'li', 'strong', 'b', 'em', 'i', 'u',
'table', 'thead', 'tbody', 'td', 'tr'],
attributes=[],
styles=['color', 'border', 'border-color'],
strip=True, strip_comments=True)
def clean_email_html(html):
if not isinstance(html, string_types):
return html
return bleach.clean(clean_script_and_style(html),
tags=['div', 'p', 'br', 'ul', 'ol', 'li', 'strong', 'b', 'em', 'i', 'u', 'a',
'table', 'thead', 'tbody', 'td', 'tr', 'th', 'pre', 'code',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'button', 'img'],
attributes=['border', 'colspan', 'rowspan',
'src', 'href', 'style', 'id'],
styles=['color', 'border-color', 'width', 'height', 'max-width',
'background-color', 'border-collapse', 'border-radius',
'border', 'border-top', 'border-bottom', 'border-left', 'border-right',
'margin', 'margin-top', 'margin-bottom', 'margin-left', 'margin-right',
'padding', 'padding-top', 'padding-bottom', 'padding-left', 'padding-right',
'font-size', 'font-weight', 'font-family', 'text-decoration',
'line-height', 'text-align', 'vertical-align'
],
protocols=['cid', 'http', 'https', 'mailto', 'data'],
strip=True, strip_comments=True)
def clean_script_and_style(html):
# remove script and style
soup = BeautifulSoup(html, 'html5lib')
for s in soup(['script', 'style']):
s.decompose()
return frappe.as_unicode(soup)
def sanitize_html(html, linkify=False):
"""
Sanitize HTML tags, attributes and style to prevent XSS attacks
Based on bleach clean, bleach whitelist and html5lib's Sanitizer defaults
Does not sanitize JSON, as it could lead to future problems
"""
if not isinstance(html, string_types):
return html
elif is_json(html):
return html
tags = (acceptable_elements + svg_elements + mathml_elements
+ ["html", "head", "meta", "link", "body", "style", "o:p"])
attributes = {"*": acceptable_attributes, 'svg': svg_attributes}
styles = bleach_whitelist.all_styles
strip_comments = False
# retuns html with escaped tags, escaped orphan >, <, etc.
escaped_html = bleach.clean(html, tags=tags, attributes=attributes, styles=styles,
strip_comments=strip_comments, protocols=['cid', 'http', 'https', 'mailto'])
if linkify:
escaped_html = bleach.linkify(escaped_html, callbacks=[])
return escaped_html
def is_json(text):
try:
json.loads(text)
except ValueError:
return False
else:
return True
def get_icon_html(icon, small=False):
from frappe.utils import is_image
emoji_pattern = re.compile(
u"(\ud83d[\ude00-\ude4f])|"
u"(\ud83c[\udf00-\uffff])|"
u"(\ud83d[\u0000-\uddff])|"
u"(\ud83d[\ude80-\udeff])|"
u"(\ud83c[\udde0-\uddff])"
"+", flags=re.UNICODE)
icon = icon or ""
if icon and emoji_pattern.match(icon):
return '<span class="text-muted">' + icon + '</span>'
if is_image(icon):
return \
'<img style="width: 16px; height: 16px;" src="{icon}">'.format(icon=icon) \
if small else \
'<img src="{icon}">'.format(icon=icon)
else:
return "<i class='{icon}'></i>".format(icon=icon)
# adapted from https://raw.githubusercontent.com/html5lib/html5lib-python/4aa79f113e7486c7ec5d15a6e1777bfe546d3259/html5lib/sanitizer.py
acceptable_elements = [
'a', 'abbr', 'acronym', 'address', 'area',
'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video'
]
mathml_elements = [
'maction', 'math', 'merror', 'mfrac', 'mi',
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
'munderover', 'none'
]
svg_elements = [
'a', 'animate', 'animateColor', 'animateMotion',
'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use'
]
acceptable_attributes = [
'abbr', 'accept', 'accept-charset', 'accesskey',
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
'background', 'balance', 'bgcolor', 'bgproperties', 'border',
'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
'width', 'wrap', 'xml:lang', 'data-row', 'data-list', 'data-language',
'data-value', 'role', 'frameborder', 'allowfullscreen', 'spellcheck',
'data-mode', 'data-gramm', 'data-placeholder', 'data-comment',
'data-id', 'data-denotation-char', 'itemprop', 'itemscope',
'itemtype', 'itemid', 'itemref', 'datetime'
]
mathml_attributes = [
'actiontype', 'align', 'columnalign', 'columnalign',
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
'xlink:type', 'xmlns', 'xmlns:xlink'
]
svg_attributes = [
'accent-height', 'accumulate', 'additive', 'alphabetic',
'arabic-form', 'ascent', 'attributeName', 'attributeType',
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
'fill-opacity', 'fill-rule', 'font-family', 'font-size',
'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
'opacity', 'orient', 'origin', 'overline-position',
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
'transform', 'type', 'u1', 'u2', 'underline-position',
'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
'y1', 'y2', 'zoomAndPan'
]