refactor: don't modify email library's dictionary
Keep our own map of alternative character sets Signed-off-by: Akhil Narang <me@akhilnarang.dev>
This commit is contained in:
parent
69f9db6751
commit
441379e7a8
2 changed files with 26 additions and 15 deletions
|
|
@ -2501,9 +2501,22 @@ def safe_encode(param, encoding="utf-8"):
|
|||
return param
|
||||
|
||||
|
||||
def safe_decode(param, encoding="utf-8"):
|
||||
def safe_decode(param, encoding="utf-8", fallback_map: dict | None = None):
|
||||
"""
|
||||
Method to safely decode data into a string
|
||||
|
||||
:param param: The data to be decoded
|
||||
:param encoding: The encoding to decode into
|
||||
:param fallback_map: A fallback map to reference in case of a LookupError
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
param = param.decode(encoding)
|
||||
except LookupError:
|
||||
try:
|
||||
param = param.decode((fallback_map or {}).get(encoding, "utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
return param
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
import datetime
|
||||
import email
|
||||
import email.charset
|
||||
import email.utils
|
||||
import imaplib
|
||||
import json
|
||||
|
|
@ -39,19 +38,16 @@ from frappe.utils import (
|
|||
from frappe.utils.html_utils import clean_email_html
|
||||
from frappe.utils.user import is_system_user
|
||||
|
||||
# use alias charset for python unknown charset
|
||||
email.charset.ALIASES.update(
|
||||
{
|
||||
"windows-874": "cp874",
|
||||
}
|
||||
)
|
||||
|
||||
# fix due to a python bug in poplib that limits it to 2048
|
||||
poplib._MAXLINE = 1_00_000
|
||||
|
||||
THREAD_ID_PATTERN = re.compile(r"(?<=\[)[\w/-]+")
|
||||
WORDS_PATTERN = re.compile(r"\w+")
|
||||
|
||||
ALTERNATE_CHARSET_MAP = {
|
||||
"windows-874": "cp874",
|
||||
}
|
||||
|
||||
|
||||
class EmailSizeExceededError(frappe.ValidationError):
|
||||
pass
|
||||
|
|
@ -413,12 +409,10 @@ class Email:
|
|||
"""Parse and decode `Subject` header."""
|
||||
_subject = decode_header(self.mail.get("Subject", "No Subject"))
|
||||
self.subject = _subject[0][0] or ""
|
||||
charset = _subject[0][1]
|
||||
|
||||
if charset:
|
||||
if charset := _subject[0][1]:
|
||||
# Encoding is known by decode_header (might also be unknown-8bit)
|
||||
charset = email.charset.ALIASES.get(charset, charset)
|
||||
self.subject = safe_decode(self.subject, charset)
|
||||
self.subject = safe_decode(self.subject, charset, ALTERNATE_CHARSET_MAP)
|
||||
|
||||
if isinstance(self.subject, bytes):
|
||||
# Fall back to utf-8 if the charset is unknown or decoding fails
|
||||
|
|
@ -512,11 +506,15 @@ class Email:
|
|||
|
||||
def get_payload(self, part):
|
||||
charset = self.get_charset(part)
|
||||
charset = email.charset.ALIASES.get(charset, charset)
|
||||
try:
|
||||
return str(part.get_payload(decode=True), str(charset), "ignore")
|
||||
except LookupError:
|
||||
return part.get_payload()
|
||||
try:
|
||||
return str(
|
||||
part.get_payload(decode=True), ALTERNATE_CHARSET_MAP.get(charset, "utf-8"), "ignore"
|
||||
)
|
||||
except Exception:
|
||||
return part.get_payload()
|
||||
|
||||
def get_attachment(self, part):
|
||||
# charset = self.get_charset(part)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue