From b5bc8b308db9b5b682c199c5e1d1609fdd7fe96b Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Tue, 30 Apr 2024 20:02:51 +0530 Subject: [PATCH] fix: system health ergonomics - if redis is down it takes 10 second and doesn't indicate that correctly - full traceback is shared in debug log, if some step fails --- .../system_health_report.py | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/frappe/desk/doctype/system_health_report/system_health_report.py b/frappe/desk/doctype/system_health_report/system_health_report.py index 8d3de087a7..a76baa97b7 100644 --- a/frappe/desk/doctype/system_health_report/system_health_report.py +++ b/frappe/desk/doctype/system_health_report/system_health_report.py @@ -19,15 +19,29 @@ import functools import os from collections import defaultdict from collections.abc import Callable +from contextlib import contextmanager import frappe from frappe.model.document import Document -from frappe.utils.background_jobs import get_queue, get_queue_list +from frappe.utils.background_jobs import get_queue, get_queue_list, get_redis_conn from frappe.utils.caching import redis_cache from frappe.utils.data import add_to_date from frappe.utils.scheduler import get_scheduler_status +@contextmanager +def no_wait(func): + "Disable tenacity waiting on some function" + from tenacity import stop_after_attempt + + try: + original_stop = func.retry.stop + func.retry.stop = stop_after_attempt(1) + yield + finally: + func.retry.stop = original_stop + + def health_check(step: str): assert isinstance(step, str), "Invalid usage of decorator, Usage: @health_check('step name')" @@ -37,8 +51,11 @@ def health_check(step: str): try: return func(*args, **kwargs) except Exception as e: + frappe.log(frappe.get_traceback()) # nosemgrep - frappe.msgprint(f"System Health check step {frappe.bold(step)} failed: {e}", alert=True) + frappe.msgprint( + f"System Health check step {frappe.bold(step)} failed: {e}", alert=True, indicator="red" + ) return wrapper @@ -126,7 +143,10 @@ class SystemHealthReport(Document): self.fetch_user_stats() @health_check("Background Jobs") + @no_wait(get_redis_conn) def fetch_background_jobs(self): + self.background_jobs_check = "failed" + # This just checks connection life self.test_job_id = frappe.enqueue("frappe.ping", at_front=True).id self.background_jobs_check = "queued" self.scheduler_status = get_scheduler_status().get("status") @@ -292,6 +312,7 @@ class SystemHealthReport(Document): @frappe.whitelist() +@no_wait(get_redis_conn) def get_job_status(job_id: str | None = None): frappe.only_for("System Manager") try: