From d0c3a8ee5675d8dc323192ac3772ecbfaf345012 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Mon, 17 Feb 2025 10:34:07 +0530 Subject: [PATCH] fix: check scheduler process status in health report (#31284) Currently it's just checking if the scheduler is enabled or not. This PR also adds a check to see if the process is running or not. --- .../system_health_report.py | 5 +++-- frappe/utils/scheduler.py | 21 ++++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/frappe/desk/doctype/system_health_report/system_health_report.py b/frappe/desk/doctype/system_health_report/system_health_report.py index abdc1b0272..9cd74546af 100644 --- a/frappe/desk/doctype/system_health_report/system_health_report.py +++ b/frappe/desk/doctype/system_health_report/system_health_report.py @@ -27,7 +27,7 @@ from frappe.model.document import Document from frappe.utils.background_jobs import get_queue, get_queue_list, get_redis_conn from frappe.utils.caching import redis_cache from frappe.utils.data import add_to_date -from frappe.utils.scheduler import get_scheduler_status, get_scheduler_tick +from frappe.utils.scheduler import get_scheduler_status, get_scheduler_tick, is_schduler_process_running @contextmanager @@ -185,7 +185,8 @@ class SystemHealthReport(Document): lower_threshold = add_to_date(None, days=-7, as_datetime=True) # Exclude "maybe" curently executing job upper_threshold = add_to_date(None, minutes=-30, as_datetime=True) - self.scheduler_status = get_scheduler_status().get("status") + scheduler_running = get_scheduler_status().get("status") == "active" and is_schduler_process_running() + self.scheduler_status = "Active" if scheduler_running else "Inactive" mariadb_query = """ SELECT scheduled_job_type, diff --git a/frappe/utils/scheduler.py b/frappe/utils/scheduler.py index be3fec64fd..d48506a305 100644 --- a/frappe/utils/scheduler.py +++ b/frappe/utils/scheduler.py @@ -47,7 +47,7 @@ def start_scheduler() -> NoReturn: tick = get_scheduler_tick() set_niceness() - lock_path = os.path.abspath(os.path.join(get_bench_path(), "config", "scheduler_process")) + lock_path = _get_scheduler_lock_file() try: lock = FileLock(lock_path) @@ -62,6 +62,25 @@ def start_scheduler() -> NoReturn: enqueue_events_for_all_sites() +def _get_scheduler_lock_file() -> True: + return os.path.abspath(os.path.join(get_bench_path(), "config", "scheduler_process")) + + +def is_schduler_process_running() -> bool: + """Checks if any other process is holding the lock. + + Note: FLOCK is held by process until it exits, this function just checks if process is + running or not. We can't determine if process is stuck somehwere. + """ + try: + lock = FileLock(_get_scheduler_lock_file()) + lock.acquire(blocking=False) + lock.release() + return False + except Timeout: + return True + + def sleep_duration(tick): if tick != DEFAULT_SCHEDULER_TICK: # Assuming user knows what they want.