refactor: don't modify email library's dictionary

Keep our own map of alternative character sets

Signed-off-by: Akhil Narang <me@akhilnarang.dev>
This commit is contained in:
Akhil Narang 2024-04-19 17:36:30 +05:30
parent 69f9db6751
commit 441379e7a8
No known key found for this signature in database
GPG key ID: 9DCC61E211BF645F
2 changed files with 26 additions and 15 deletions

View file

@ -2501,9 +2501,22 @@ def safe_encode(param, encoding="utf-8"):
return param
def safe_decode(param, encoding="utf-8"):
def safe_decode(param, encoding="utf-8", fallback_map: dict | None = None):
"""
Method to safely decode data into a string
:param param: The data to be decoded
:param encoding: The encoding to decode into
:param fallback_map: A fallback map to reference in case of a LookupError
:return:
"""
try:
param = param.decode(encoding)
except LookupError:
try:
param = param.decode((fallback_map or {}).get(encoding, "utf-8"))
except Exception:
pass
except Exception:
pass
return param

View file

@ -3,7 +3,6 @@
import datetime
import email
import email.charset
import email.utils
import imaplib
import json
@ -39,19 +38,16 @@ from frappe.utils import (
from frappe.utils.html_utils import clean_email_html
from frappe.utils.user import is_system_user
# use alias charset for python unknown charset
email.charset.ALIASES.update(
{
"windows-874": "cp874",
}
)
# fix due to a python bug in poplib that limits it to 2048
poplib._MAXLINE = 1_00_000
THREAD_ID_PATTERN = re.compile(r"(?<=\[)[\w/-]+")
WORDS_PATTERN = re.compile(r"\w+")
ALTERNATE_CHARSET_MAP = {
"windows-874": "cp874",
}
class EmailSizeExceededError(frappe.ValidationError):
pass
@ -413,12 +409,10 @@ class Email:
"""Parse and decode `Subject` header."""
_subject = decode_header(self.mail.get("Subject", "No Subject"))
self.subject = _subject[0][0] or ""
charset = _subject[0][1]
if charset:
if charset := _subject[0][1]:
# Encoding is known by decode_header (might also be unknown-8bit)
charset = email.charset.ALIASES.get(charset, charset)
self.subject = safe_decode(self.subject, charset)
self.subject = safe_decode(self.subject, charset, ALTERNATE_CHARSET_MAP)
if isinstance(self.subject, bytes):
# Fall back to utf-8 if the charset is unknown or decoding fails
@ -512,11 +506,15 @@ class Email:
def get_payload(self, part):
charset = self.get_charset(part)
charset = email.charset.ALIASES.get(charset, charset)
try:
return str(part.get_payload(decode=True), str(charset), "ignore")
except LookupError:
return part.get_payload()
try:
return str(
part.get_payload(decode=True), ALTERNATE_CHARSET_MAP.get(charset, "utf-8"), "ignore"
)
except Exception:
return part.get_payload()
def get_attachment(self, part):
# charset = self.get_charset(part)