From 69f9db6751220298294fd4d2dbd54ed2622b38d3 Mon Sep 17 00:00:00 2001 From: "Kitti U. @ Ecosoft" Date: Wed, 17 Apr 2024 17:00:27 +0700 Subject: [PATCH] fix: unknown charset windows-874 problem on incoming mail When the sender is using email with windows-874 charset (i.e., Outlook / Thai), the incoming email, i.e., to Issue will result in weird characters. This is due to, python don't know about this charset. This fix by using alias charset for the problematic charset. --- frappe/email/receive.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/frappe/email/receive.py b/frappe/email/receive.py index f0b49de7eb..82cc2c25a2 100644 --- a/frappe/email/receive.py +++ b/frappe/email/receive.py @@ -3,6 +3,7 @@ import datetime import email +import email.charset import email.utils import imaplib import json @@ -38,6 +39,13 @@ from frappe.utils import ( from frappe.utils.html_utils import clean_email_html from frappe.utils.user import is_system_user +# use alias charset for python unknown charset +email.charset.ALIASES.update( + { + "windows-874": "cp874", + } +) + # fix due to a python bug in poplib that limits it to 2048 poplib._MAXLINE = 1_00_000 @@ -405,10 +413,12 @@ class Email: """Parse and decode `Subject` header.""" _subject = decode_header(self.mail.get("Subject", "No Subject")) self.subject = _subject[0][0] or "" + charset = _subject[0][1] - if _subject[0][1]: + if charset: # Encoding is known by decode_header (might also be unknown-8bit) - self.subject = safe_decode(self.subject, _subject[0][1]) + charset = email.charset.ALIASES.get(charset, charset) + self.subject = safe_decode(self.subject, charset) if isinstance(self.subject, bytes): # Fall back to utf-8 if the charset is unknown or decoding fails @@ -502,7 +512,7 @@ class Email: def get_payload(self, part): charset = self.get_charset(part) - + charset = email.charset.ALIASES.get(charset, charset) try: return str(part.get_payload(decode=True), str(charset), "ignore") except LookupError: