fix: granular status in system health report

differentiate between dead process, manually disabled or dormant.
This commit is contained in:
Ankush Menat 2025-02-21 22:52:08 +05:30
parent 6db7cb096d
commit 967d3e828c
4 changed files with 29 additions and 11 deletions

View file

@ -27,7 +27,12 @@ from frappe.model.document import Document
from frappe.utils.background_jobs import get_queue, get_queue_list, get_redis_conn
from frappe.utils.caching import redis_cache
from frappe.utils.data import add_to_date
from frappe.utils.scheduler import get_scheduler_status, get_scheduler_tick, is_schduler_process_running
from frappe.utils.scheduler import (
get_scheduler_status,
get_scheduler_tick,
is_dormant,
is_schduler_process_running,
)
@contextmanager
@ -151,7 +156,6 @@ class SystemHealthReport(Document):
# This just checks connection life
self.test_job_id = frappe.enqueue("frappe.ping", at_front=True).id
self.background_jobs_check = "queued"
self.scheduler_status = get_scheduler_status().get("status")
workers = frappe.get_all("RQ Worker")
self.total_background_workers = len(workers)
queue_summary = defaultdict(list)
@ -182,11 +186,20 @@ class SystemHealthReport(Document):
@health_check("Scheduler")
def fetch_scheduler(self):
scheduler_enabled = get_scheduler_status().get("status") == "active"
if not is_schduler_process_running():
self.scheduler_status = "Process Not Found"
elif is_dormant():
self.scheduler_status = "Dormant"
elif scheduler_enabled:
self.scheduler_status = "Active"
else:
self.scheduler_status = "Inactive"
lower_threshold = add_to_date(None, days=-7, as_datetime=True)
# Exclude "maybe" curently executing job
upper_threshold = add_to_date(None, minutes=-30, as_datetime=True)
scheduler_running = get_scheduler_status().get("status") == "active" and is_schduler_process_running()
self.scheduler_status = "Active" if scheduler_running else "Inactive"
mariadb_query = """
SELECT scheduled_job_type,

View file

@ -1,18 +1,15 @@
import os
import time
from datetime import datetime, timedelta
from unittest import TestCase
from unittest.mock import patch
import frappe
from frappe.core.doctype.scheduled_job_type.scheduled_job_type import ScheduledJobType, sync_jobs
from frappe.tests import IntegrationTestCase
from frappe.utils import add_days, get_datetime
from frappe.utils.data import now_datetime
from frappe.utils.doctor import purge_pending_jobs
from frappe.utils.scheduler import (
DEFAULT_SCHEDULER_TICK,
_get_last_creation_timestamp,
enqueue_events,
is_dormant,
schedule_jobs_based_on_activity,
@ -64,7 +61,9 @@ class TestScheduler(IntegrationTestCase):
@patch.object(frappe.utils.frappecloud, "on_frappecloud", return_value=True)
@patch.dict(frappe.conf, {"developer_mode": 0})
def test_is_dormant(self, _mock):
last_activity = frappe.db.get_value("User", filters={}, fieldname="max(last_active)")
last_activity = frappe.db.get_value(
"User", filters={}, fieldname="last_active", order_by="last_active desc"
)
self.assertTrue(is_dormant(check_time=get_datetime("2100-01-01 00:00:00")))
self.assertTrue(is_dormant(check_time=add_days(last_activity, 5)))
self.assertFalse(is_dormant(check_time=last_activity))
@ -72,7 +71,9 @@ class TestScheduler(IntegrationTestCase):
@patch.object(frappe.utils.frappecloud, "on_frappecloud", return_value=True)
@patch.dict(frappe.conf, {"developer_mode": 0})
def test_once_a_day_for_dormant(self, _mocks):
last_activity = frappe.db.get_value("User", filters={}, fieldname="max(last_active)")
last_activity = frappe.db.get_value(
"User", filters={}, fieldname="last_active", order_by="last_active desc"
)
frappe.db.truncate("Scheduled Job Log")
self.assertTrue(schedule_jobs_based_on_activity(check_time=get_datetime("2100-01-01 00:00:00")))
self.assertTrue(schedule_jobs_based_on_activity(check_time=add_days(last_activity, 5)))
@ -83,6 +84,7 @@ class TestScheduler(IntegrationTestCase):
job_log = frappe.get_doc("Scheduled Job Log", dict(scheduled_job_type=job.name))
job_log.db_set("creation", add_days(last_activity, 5), update_modified=False)
schedule_jobs_based_on_activity.clear_cache()
is_dormant.clear_cache()
# inactive site with recent job, don't run
self.assertFalse(schedule_jobs_based_on_activity(check_time=add_days(last_activity, 5)))

View file

@ -1,6 +1,6 @@
import frappe
FRAPPE_CLOUD_DOMAINS = ("frappe.cloud", "erpnext.com", "frappehr.com")
FRAPPE_CLOUD_DOMAINS = ("frappe.cloud", "erpnext.com", "frappehr.com", "frappe.dev")
def on_frappecloud() -> bool:

View file

@ -219,6 +219,7 @@ def schedule_jobs_based_on_activity(check_time=None):
return True
@redis_cache(ttl=60 * 60)
def is_dormant(check_time=None):
from frappe.utils.frappecloud import on_frappecloud
@ -228,7 +229,9 @@ def is_dormant(check_time=None):
if not threshold:
return False
last_activity = frappe.db.get_value("User", filters={}, fieldname="max(last_active)")
last_activity = frappe.db.get_value(
"User", filters={}, fieldname="last_active", order_by="last_active desc"
)
if not last_activity:
return True