fix: properly decode encoded Subject header (#37016)

* fix: properly decode encoded `Subject` header

* test(email): add RFC2047 subject decoding tests for InboundMail
This commit is contained in:
s-aga-r 2026-02-16 20:42:52 +05:30 committed by GitHub
parent 06c8217c99
commit f951445e82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 79 additions and 11 deletions

View file

@ -651,6 +651,68 @@ class TestInboundMail(IntegrationTestCase):
reference_doc = inbound_mail.reference_document()
self.assertEqual(todo.name, reference_doc.name)
def test_inbound_mail_decodes_rfc2047_subject(self):
subjects = [
# UTF-8 Quoted-Printable (English)
(
"=?UTF-8?Q?New_Notifications?=",
"RE: New Notifications",
),
# UTF-8 Base64 (English)
(
"=?UTF-8?B?TmV3IE5vdGlmaWNhdGlvbnM=?=",
"RE: New Notifications",
),
# FWD prefix + Base64 (Russian)
(
"FWD: =?UTF-8?B?0J/RgNC40LLQtdGCINC80LjRgA==?=",
"RE: FWD: Привет мир",
),
# RE prefix + Quoted-Printable (Russian)
(
"RE: =?UTF-8?Q?=D0=9E=D1=82=D1=87=D0=B5=D1=82_=D0=B3=D0=BE=D1=82=D0=BE=D0=B2?=",
"RE: RE: Отчет готов",
),
# Mixed plain + encoded (number symbol)
(
"Invoice =?UTF-8?Q?=E2=84=96_1234?=",
"RE: Invoice № 1234",
),
# Multiple encoded words (split header)
(
"=?UTF-8?B?TmV3?= =?UTF-8?B?IE5vdGlmaWNhdGlvbnM=?=",
"RE: New Notifications",
),
# Emoji (Quoted-Printable)
(
"=?UTF-8?Q?Deployment_complete_=F0=9F=9A=80?=",
"RE: Deployment complete 🚀",
),
# Lowercase encoding markers
(
"=?utf-8?b?TmV3IE5vdGlmaWNhdGlvbnM=?=",
"RE: New Notifications",
),
# ISO-8859-1 Quoted-Printable
(
"=?ISO-8859-1?Q?Ol=E1_Mundo?=",
"RE: Olá Mundo",
),
# Encoded word inside sentence
(
"Meeting about =?UTF-8?B?0L/RgNC+0LXQutGC?= tomorrow",
"RE: Meeting about проект tomorrow",
),
]
for subject, expected in subjects:
mail_content = self.get_test_mail(fname="incoming-subject-placeholder.raw").replace(
"{{ subject }}", subject
)
email_account = frappe.get_doc("Email Account", "_Test Email Account 1")
inbound_mail = InboundMail(mail_content, email_account, 12345, 1)
self.assertEqual(inbound_mail.subject, expected)
def test_create_communication_from_mail(self):
# Create email queue record
mail_content = self.get_test_mail(fname="incoming-2.raw")

View file

@ -424,21 +424,27 @@ class Email:
def set_subject(self):
"""Parse and decode `Subject` header."""
_subject = decode_header(self.mail.get("Subject", "No Subject"))
self.subject = _subject[0][0] or ""
if charset := _subject[0][1]:
# Encoding is known by decode_header (might also be unknown-8bit)
self.subject = safe_decode(self.subject, charset, ALTERNATE_CHARSET_MAP)
raw_subject = self.mail.get("Subject")
if not raw_subject:
self.subject = "No Subject"
return
if isinstance(self.subject, bytes):
# Fall back to utf-8 if the charset is unknown or decoding fails
# Replace invalid characters with '<?>'
self.subject = self.subject.decode("utf-8", "replace")
decoded_fragments = []
for fragment, charset in decode_header(raw_subject):
if isinstance(fragment, bytes):
charset = charset or "utf-8"
try:
fragment = fragment.decode(charset, errors="replace")
except LookupError:
# Fallback to utf-8 if decoding fails
fragment = fragment.decode("utf-8", errors="replace")
decoded_fragments.append(fragment)
subject = "".join(decoded_fragments).strip()
# Convert non-string (e.g. None)
# Truncate to 140 chars (can be used as a document name)
self.subject = str(self.subject).strip()[:140] or "No Subject"
self.subject = subject[:140] if subject else "No Subject"
def set_from(self):
# gmail mailing-list compatibility