fix: unknown charset windows-874 problem on incoming mail

When the sender is using email with windows-874 charset (i.e., Outlook / Thai), the incoming email, i.e., to Issue will result in weird characters.

This is due to, python don't know about this charset. This fix by using alias charset for the problematic charset.
This commit is contained in:
Kitti U. @ Ecosoft 2024-04-17 17:00:27 +07:00 committed by Kitti U
parent e020862b34
commit 69f9db6751

View file

@ -3,6 +3,7 @@
import datetime
import email
import email.charset
import email.utils
import imaplib
import json
@ -38,6 +39,13 @@ from frappe.utils import (
from frappe.utils.html_utils import clean_email_html
from frappe.utils.user import is_system_user
# use alias charset for python unknown charset
email.charset.ALIASES.update(
{
"windows-874": "cp874",
}
)
# fix due to a python bug in poplib that limits it to 2048
poplib._MAXLINE = 1_00_000
@ -405,10 +413,12 @@ class Email:
"""Parse and decode `Subject` header."""
_subject = decode_header(self.mail.get("Subject", "No Subject"))
self.subject = _subject[0][0] or ""
charset = _subject[0][1]
if _subject[0][1]:
if charset:
# Encoding is known by decode_header (might also be unknown-8bit)
self.subject = safe_decode(self.subject, _subject[0][1])
charset = email.charset.ALIASES.get(charset, charset)
self.subject = safe_decode(self.subject, charset)
if isinstance(self.subject, bytes):
# Fall back to utf-8 if the charset is unknown or decoding fails
@ -502,7 +512,7 @@ class Email:
def get_payload(self, part):
charset = self.get_charset(part)
charset = email.charset.ALIASES.get(charset, charset)
try:
return str(part.get_payload(decode=True), str(charset), "ignore")
except LookupError: