From 441379e7a8b1b7468cbd37c15c5174b4c0faa9a4 Mon Sep 17 00:00:00 2001 From: Akhil Narang Date: Fri, 19 Apr 2024 17:36:30 +0530 Subject: [PATCH] refactor: don't modify email library's dictionary Keep our own map of alternative character sets Signed-off-by: Akhil Narang --- frappe/__init__.py | 15 ++++++++++++++- frappe/email/receive.py | 26 ++++++++++++-------------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/frappe/__init__.py b/frappe/__init__.py index 77b840412e..9c37d47d0c 100644 --- a/frappe/__init__.py +++ b/frappe/__init__.py @@ -2501,9 +2501,22 @@ def safe_encode(param, encoding="utf-8"): return param -def safe_decode(param, encoding="utf-8"): +def safe_decode(param, encoding="utf-8", fallback_map: dict | None = None): + """ + Method to safely decode data into a string + + :param param: The data to be decoded + :param encoding: The encoding to decode into + :param fallback_map: A fallback map to reference in case of a LookupError + :return: + """ try: param = param.decode(encoding) + except LookupError: + try: + param = param.decode((fallback_map or {}).get(encoding, "utf-8")) + except Exception: + pass except Exception: pass return param diff --git a/frappe/email/receive.py b/frappe/email/receive.py index 82cc2c25a2..4d23a4d4b3 100644 --- a/frappe/email/receive.py +++ b/frappe/email/receive.py @@ -3,7 +3,6 @@ import datetime import email -import email.charset import email.utils import imaplib import json @@ -39,19 +38,16 @@ from frappe.utils import ( from frappe.utils.html_utils import clean_email_html from frappe.utils.user import is_system_user -# use alias charset for python unknown charset -email.charset.ALIASES.update( - { - "windows-874": "cp874", - } -) - # fix due to a python bug in poplib that limits it to 2048 poplib._MAXLINE = 1_00_000 THREAD_ID_PATTERN = re.compile(r"(?<=\[)[\w/-]+") WORDS_PATTERN = re.compile(r"\w+") +ALTERNATE_CHARSET_MAP = { + "windows-874": "cp874", +} + class EmailSizeExceededError(frappe.ValidationError): pass @@ -413,12 +409,10 @@ class Email: """Parse and decode `Subject` header.""" _subject = decode_header(self.mail.get("Subject", "No Subject")) self.subject = _subject[0][0] or "" - charset = _subject[0][1] - if charset: + if charset := _subject[0][1]: # Encoding is known by decode_header (might also be unknown-8bit) - charset = email.charset.ALIASES.get(charset, charset) - self.subject = safe_decode(self.subject, charset) + self.subject = safe_decode(self.subject, charset, ALTERNATE_CHARSET_MAP) if isinstance(self.subject, bytes): # Fall back to utf-8 if the charset is unknown or decoding fails @@ -512,11 +506,15 @@ class Email: def get_payload(self, part): charset = self.get_charset(part) - charset = email.charset.ALIASES.get(charset, charset) try: return str(part.get_payload(decode=True), str(charset), "ignore") except LookupError: - return part.get_payload() + try: + return str( + part.get_payload(decode=True), ALTERNATE_CHARSET_MAP.get(charset, "utf-8"), "ignore" + ) + except Exception: + return part.get_payload() def get_attachment(self, part): # charset = self.get_charset(part)