diff --git a/frappe/core/utils.py b/frappe/core/utils.py index 13b912b3aa..10a4edca96 100644 --- a/frappe/core/utils.py +++ b/frappe/core/utils.py @@ -1,6 +1,7 @@ # Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors # License: MIT. See LICENSE + from markdownify import markdownify as md import frappe @@ -88,3 +89,42 @@ def html2text(html: str, strip_links=False, wrap=True) -> str: """Return the given `html` as markdown text.""" strip = ["a"] if strip_links else None return md(html, heading_style="ATX", strip=strip, wrap=wrap) + + +def html_to_plain_text(html: str) -> str: + """Return the given `html` as plain text.""" + + if not html: + return "" + + from bs4 import BeautifulSoup + + soup = BeautifulSoup(html, "html.parser") + + for element in soup(["script", "style"]): + element.decompose() + + # Introduce explicit newlines for block-level elements while keeping inline content on the same line. + for br in soup.find_all("br"): + br.replace_with("\n") + + for block in soup.find_all(["p", "div", "tr", "li", "h1", "h2", "h3", "h4", "h5", "h6"]): + block.append("\n") + + # Use a space separator between text nodes so inline tags don't break lines + text = soup.get_text(separator=" ") + + lines = [line.strip() for line in text.splitlines()] + cleaned = [] + previous_blank = False + + for line in lines: + if line: + cleaned.append(line) + previous_blank = False + else: + if not previous_blank: + cleaned.append("") + previous_blank = True + + return "\n".join(cleaned).strip() diff --git a/frappe/email/doctype/email_queue/email_queue.py b/frappe/email/doctype/email_queue/email_queue.py index b0576dfe38..1eed638991 100644 --- a/frappe/email/doctype/email_queue/email_queue.py +++ b/frappe/email/doctype/email_queue/email_queue.py @@ -13,7 +13,7 @@ from typing import TYPE_CHECKING import frappe from frappe import _, are_emails_muted, safe_encode, task -from frappe.core.utils import html2text +from frappe.core.utils import html_to_plain_text from frappe.database.database import savepoint from frappe.email.doctype.email_account.email_account import EmailAccount from frappe.email.email_body import add_attachment, get_email, get_formatted_html @@ -680,7 +680,7 @@ class QueueBuilder: return self._text_content + unsubscribe_text_message try: - text_content = html2text(self._message) + text_content = html_to_plain_text(self._message) except Exception: text_content = "See html attachment" return text_content + unsubscribe_text_message