fix: Abort flushing email queue if >50% fail.

When email queue batch failes >33% with >10 count, frappe will now abort
sending emails.

We already notify users via system notification so this
assumes that user will notice it and fix it in sometime.

With previous commits we also prioritize fresh emails over retries.
This commit is contained in:
Ankush Menat 2023-11-02 18:36:11 +05:30
parent 050c0b26f8
commit 4e318a0280
4 changed files with 22 additions and 17 deletions

View file

@ -86,7 +86,7 @@ class EmailQueue(Document):
return duplicate
@classmethod
def new(cls, doc_data, ignore_permissions=False):
def new(cls, doc_data, ignore_permissions=False) -> "EmailQueue":
data = doc_data.copy()
if not data.get("recipients"):
return
@ -99,7 +99,7 @@ class EmailQueue(Document):
return doc
@classmethod
def find(cls, name):
def find(cls, name) -> "EmailQueue":
return frappe.get_doc(cls.DOCTYPE, name)
@classmethod

View file

@ -151,7 +151,7 @@ class TestNewsletter(TestNewsletterMixin, FrappeTestCase):
"Newsletter Email Group", filters={"parent": name}, fields=["email_group"]
)
flush(from_test=True)
flush()
confirmed_unsubscribe(to_unsubscribe, group[0].email_group)
name = self.send_newsletter()

View file

@ -7,6 +7,11 @@ from frappe.utils import cint, cstr, get_url, now_datetime
from frappe.utils.data import getdate
from frappe.utils.verified_command import get_signed_params, verify_request
# After this percent of failures in every batch, entire batch is aborted.
# This usually indicates a systemic failure so we shouldn't keep trying to send emails.
EMAIL_QUEUE_BATCH_FAILURE_THRESHOLD_PERCENT = 0.33
EMAIL_QUEUE_BATCH_FAILURE_THRESHOLD_COUNT = 10
def get_emails_sent_this_month(email_account=None):
"""Get count of emails sent from a specific email account.
@ -124,33 +129,33 @@ def return_unsubscribed_page(email, doctype, name):
)
def flush(from_test=False):
def flush():
"""flush email queue, every time: called from scheduler"""
from frappe.email.doctype.email_queue.email_queue import send_mail
# To avoid running jobs inside unit tests
if frappe.are_emails_muted():
msgprint(_("Emails are muted"))
from_test = True
if cint(frappe.db.get_default("suspend_email_queue")) == 1:
return
email_queue_batch = get_queue()
if not email_queue_batch:
return
failed_email_queues = []
for row in email_queue_batch:
try:
send_mail(
email_queue_name=row.name,
now=from_test,
job_id=f"email_queue_sendmail_{row.name}",
queue="short",
deduplicate=True,
)
send_mail(email_queue_name=row.name)
except Exception:
frappe.db.rollback()
frappe.get_doc("Email Queue", row.name).log_error()
finally:
frappe.db.commit()
failed_email_queues.append(row.name)
if (
len(failed_email_queues) / len(email_queue_batch) > EMAIL_QUEUE_BATCH_FAILURE_THRESHOLD_PERCENT
and len(failed_email_queues) > EMAIL_QUEUE_BATCH_FAILURE_THRESHOLD_COUNT
):
frappe.throw(_("Email Queue flushing aborted due to too many failures."))
def get_queue():

View file

@ -52,7 +52,7 @@ class TestEmail(FrappeTestCase):
self.test_email_queue(send_after=1)
from frappe.email.queue import flush
flush(from_test=True)
flush()
email_queue = frappe.db.sql(
"""select name from `tabEmail Queue` where status='Sent'""", as_dict=1
)
@ -62,7 +62,7 @@ class TestEmail(FrappeTestCase):
self.test_email_queue()
from frappe.email.queue import flush
flush(from_test=True)
flush()
email_queue = frappe.db.sql(
"""select name from `tabEmail Queue` where status='Sent'""", as_dict=1
)