From de2e0e6b5f1a2e36c4908da11a50f5c995ce7f71 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Mon, 3 Mar 2025 20:40:53 +0530 Subject: [PATCH 1/2] fix: CSS Styles of health report heatmap --- .../desk/doctype/system_health_report/system_health_report.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frappe/desk/doctype/system_health_report/system_health_report.js b/frappe/desk/doctype/system_health_report/system_health_report.js index fcf5d52289..83fcec6716 100644 --- a/frappe/desk/doctype/system_health_report/system_health_report.js +++ b/frappe/desk/doctype/system_health_report/system_health_report.js @@ -67,8 +67,8 @@ frappe.ui.form.on("System Health Report", { const style = document.createElement("style"); style.innerText = `.health-check-failed { font-weight: bold; - color: var(--text-colour); - background-color: var(--bg-red); + color: var(--text-colour) !important; + background-color: var(--bg-red) !important; }`; document.head.appendChild(style); From 35667e758de6a6f7c84b733c001905efe6e7539c Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Mon, 3 Mar 2025 17:19:21 +0530 Subject: [PATCH 2/2] fix: Implement backpressure for background jobs --- frappe/exceptions.py | 4 ++++ frappe/utils/background_jobs.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/frappe/exceptions.py b/frappe/exceptions.py index 18c935fe76..210408422a 100644 --- a/frappe/exceptions.py +++ b/frappe/exceptions.py @@ -263,6 +263,10 @@ class SessionBootFailed(ValidationError): http_status_code = 500 +class QueueOverloaded(ValidationError): + http_status_code = 503 + + class PrintFormatError(ValidationError): pass diff --git a/frappe/utils/background_jobs.py b/frappe/utils/background_jobs.py index a9d65e9f39..4310c6d270 100644 --- a/frappe/utils/background_jobs.py +++ b/frappe/utils/background_jobs.py @@ -27,7 +27,8 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fi import frappe import frappe.monitor from frappe import _ -from frappe.utils import CallbackManager, cint, get_bench_id +from frappe.utils import CallbackManager, cint, get_bench_id, get_sites +from frappe.utils.caching import site_cache from frappe.utils.commands import log from frappe.utils.data import sbool from frappe.utils.redis_queue import RedisQueue @@ -40,6 +41,8 @@ RQ_RESULTS_TTL = 10 * 60 RQ_MAX_JOBS = 5000 # Restart NOFORK workers after every N number of jobs RQ_MAX_JOBS_JITTER = 50 # Random difference in max jobs to avoid restarting at same time +MAX_QUEUED_JOBS = 500 # frappe.enqueue will start failing when these many jobs exist in queue. + _redis_queue_conn = None @@ -154,6 +157,8 @@ def enqueue( raise + _check_queue_size(q) + if not timeout: timeout = get_queues_timeout().get(queue) or 300 @@ -723,6 +728,31 @@ def flush_telemetry(): ph and ph.flush() +def _check_queue_size(q: Queue): + max_jobs = cint(frappe.conf.max_queued_jobs) or MAX_QUEUED_JOBS + # Workaround for arbitrarily sized benches, + # TODO: Some concept of site-based fairness on consumption of queue + max_jobs += _site_count() * 50 + + if cint(q.count) >= max_jobs: + primary_action = { + "label": "Monitor System Health", + "client_action": "frappe.set_route", + "args": ["Form", "System Health Report"], + } + frappe.throw( + _("Too many queued background jobs ({0}). Please retry after some time.").format(max_jobs), + title=_("Queue Overloaded"), + exc=frappe.QueueOverloaded, + primary_action=primary_action if frappe.has_permission("System Health Report") else None, + ) + + +@site_cache(ttl=10 * 60) +def _site_count() -> int: + return len(get_sites()) + + def _start_sentry(): sentry_dsn = os.getenv("FRAPPE_SENTRY_DSN") if not sentry_dsn: