diff --git a/frappe/core/utils.py b/frappe/core/utils.py index 8581f30f89..b445257b7d 100644 --- a/frappe/core/utils.py +++ b/frappe/core/utils.py @@ -1,6 +1,8 @@ # Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors # License: MIT. See LICENSE +from markdownify import markdownify as md + import frappe @@ -86,3 +88,8 @@ def ljust_list(_list, length, fill_word=None): _list.extend([fill_word] * fill_length) return _list + + +def html2text(html, strip_links=False, wrap=True): + strip = ["a"] if strip_links else None + return md(html, heading_style="ATX", strip=strip, wrap=wrap) diff --git a/frappe/desk/form/document_follow.py b/frappe/desk/form/document_follow.py index 86bd712926..2e4bcedf5a 100644 --- a/frappe/desk/form/document_follow.py +++ b/frappe/desk/form/document_follow.py @@ -183,7 +183,7 @@ def get_version(doctype, doc_name, frequency, user): def get_comments(doctype, doc_name, frequency, user): - from html2text import html2text + from frappe.core.utils import html2text timeline = [] filters = get_filters("reference_name", doc_name, frequency, user) @@ -225,7 +225,7 @@ def get_follow_users(doctype, doc_name): def get_row_changed(row_changed, time, doctype, doc_name, v): - from html2text import html2text + from frappe.core.utils import html2text items = [] for d in row_changed: @@ -269,7 +269,7 @@ def get_added_row(added, time, doctype, doc_name, v): def get_field_changed(changed, time, doctype, doc_name, v): - from html2text import html2text + from frappe.core.utils import html2text items = [] for d in changed: diff --git a/frappe/email/doctype/email_queue/email_queue.py b/frappe/email/doctype/email_queue/email_queue.py index 7b57adf6fb..221f3fbb31 100644 --- a/frappe/email/doctype/email_queue/email_queue.py +++ b/frappe/email/doctype/email_queue/email_queue.py @@ -8,11 +8,11 @@ import traceback from email.parser import Parser from email.policy import SMTPUTF8 -from html2text import html2text from rq.timeouts import JobTimeoutException import frappe from frappe import _, safe_encode, task +from frappe.core.utils import html2text from frappe.email.doctype.email_account.email_account import EmailAccount from frappe.email.email_body import add_attachment, get_email, get_formatted_html from frappe.email.queue import get_unsubcribed_url, get_unsubscribe_message diff --git a/frappe/integrations/frappe_providers/frappecloud.py b/frappe/integrations/frappe_providers/frappecloud.py index 64aa847a0e..bae811d41d 100644 --- a/frappe/integrations/frappe_providers/frappecloud.py +++ b/frappe/integrations/frappe_providers/frappecloud.py @@ -1,8 +1,8 @@ import click import requests -from html2text import html2text import frappe +from frappe.core.utils import html2text def frappecloud_migrator(local_site): diff --git a/frappe/utils/data.py b/frappe/utils/data.py index da6f590a8f..7a1f05220c 100644 --- a/frappe/utils/data.py +++ b/frappe/utils/data.py @@ -1911,7 +1911,7 @@ def get_string_between(start: str, string: str, end: str) -> str: def to_markdown(html: str) -> str: from html.parser import HTMLParser - from html2text import html2text + from frappe.core.utils import html2text try: return html2text(html or "") diff --git a/frappe/utils/safe_exec.py b/frappe/utils/safe_exec.py index 9136df1062..03f5d041ce 100644 --- a/frappe/utils/safe_exec.py +++ b/frappe/utils/safe_exec.py @@ -4,7 +4,6 @@ import json import mimetypes import RestrictedPython.Guards -from html2text import html2text from RestrictedPython import compile_restricted, safe_globals import frappe @@ -13,6 +12,7 @@ import frappe.integrations.utils import frappe.utils import frappe.utils.data from frappe import _ +from frappe.core.utils import html2text from frappe.frappeclient import FrappeClient from frappe.handler import execute_cmd from frappe.model.delete_doc import delete_doc diff --git a/frappe/utils/xlsxutils.py b/frappe/utils/xlsxutils.py index 1b898f69a2..fc4ef33e88 100644 --- a/frappe/utils/xlsxutils.py +++ b/frappe/utils/xlsxutils.py @@ -52,7 +52,7 @@ def make_xlsx(data, sheet_name, wb=None, column_widths=None): def handle_html(data): - from html2text import HTML2Text + from frappe.core.utils import html2text # return if no html tags found data = frappe.as_unicode(data) @@ -62,12 +62,8 @@ def handle_html(data): h = unescape_html(data or "") - obj = HTML2Text() - obj.ignore_links = True - obj.body_width = 0 - try: - value = obj.handle(h) + value = html2text(h, strip_links=True, wrap=False) except Exception: # unable to parse html, send it raw return data diff --git a/frappe/www/search.py b/frappe/www/search.py index 6463d8138f..d8a939cb15 100644 --- a/frappe/www/search.py +++ b/frappe/www/search.py @@ -1,8 +1,8 @@ -from html2text import html2text from jinja2 import utils import frappe from frappe import _ +from frappe.core.utils import html2text from frappe.utils import sanitize_html from frappe.utils.global_search import web_search diff --git a/pyproject.toml b/pyproject.toml index ad97b1eeea..a1706ac33e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,6 @@ dependencies = [ "git-url-parse~=1.2.2", "gitdb~=4.0.7", "gunicorn~=20.1.0", - "html2text==2020.1.16", "html5lib~=1.1", "ipython~=8.4.0", "ldap3~=2.9", @@ -73,6 +72,7 @@ dependencies = [ "urllib3~=1.26.4", "xlrd~=2.0.1", "zxcvbn-python~=4.4.24", + "markdownify~=0.11.2", # integration dependencies "boto3~=1.17.53",