fix: check scheduler process status in health report (#31284)

Currently it's just checking if the scheduler is enabled or not. This PR
also adds a check to see if the process is running or not.
This commit is contained in:
Ankush Menat 2025-02-17 10:34:07 +05:30 committed by GitHub
parent 5fe0742ab9
commit d0c3a8ee56
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 23 additions and 3 deletions

View file

@ -27,7 +27,7 @@ from frappe.model.document import Document
from frappe.utils.background_jobs import get_queue, get_queue_list, get_redis_conn
from frappe.utils.caching import redis_cache
from frappe.utils.data import add_to_date
from frappe.utils.scheduler import get_scheduler_status, get_scheduler_tick
from frappe.utils.scheduler import get_scheduler_status, get_scheduler_tick, is_schduler_process_running
@contextmanager
@ -185,7 +185,8 @@ class SystemHealthReport(Document):
lower_threshold = add_to_date(None, days=-7, as_datetime=True)
# Exclude "maybe" curently executing job
upper_threshold = add_to_date(None, minutes=-30, as_datetime=True)
self.scheduler_status = get_scheduler_status().get("status")
scheduler_running = get_scheduler_status().get("status") == "active" and is_schduler_process_running()
self.scheduler_status = "Active" if scheduler_running else "Inactive"
mariadb_query = """
SELECT scheduled_job_type,

View file

@ -47,7 +47,7 @@ def start_scheduler() -> NoReturn:
tick = get_scheduler_tick()
set_niceness()
lock_path = os.path.abspath(os.path.join(get_bench_path(), "config", "scheduler_process"))
lock_path = _get_scheduler_lock_file()
try:
lock = FileLock(lock_path)
@ -62,6 +62,25 @@ def start_scheduler() -> NoReturn:
enqueue_events_for_all_sites()
def _get_scheduler_lock_file() -> True:
return os.path.abspath(os.path.join(get_bench_path(), "config", "scheduler_process"))
def is_schduler_process_running() -> bool:
"""Checks if any other process is holding the lock.
Note: FLOCK is held by process until it exits, this function just checks if process is
running or not. We can't determine if process is stuck somehwere.
"""
try:
lock = FileLock(_get_scheduler_lock_file())
lock.acquire(blocking=False)
lock.release()
return False
except Timeout:
return True
def sleep_duration(tick):
if tick != DEFAULT_SCHEDULER_TICK:
# Assuming user knows what they want.