From 658e6c5e731876da860deb40dec7186768ecfebf Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Fri, 24 Jan 2025 23:26:28 +0530 Subject: [PATCH 1/3] perf: Hard-limit `get_count` runtime to 1 second We have limited count to 1000 by default but this can still easily read entire table if: - There are filters and filters don't use index and there aren't 1000 matching rows. This unknown tail latency by default is not acceptable, so by default any count query that fails to return results in 2 seconds will be terminated and user will have to explicitly ask for the count. --- frappe/desk/reportview.py | 48 ++++++++++++++--------- frappe/public/js/frappe/list/list_view.js | 6 ++- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/frappe/desk/reportview.py b/frappe/desk/reportview.py index 250a5af698..8b571cb46b 100644 --- a/frappe/desk/reportview.py +++ b/frappe/desk/reportview.py @@ -50,31 +50,43 @@ def get_list(): @frappe.whitelist() @frappe.read_only() -def get_count() -> int: +def get_count() -> int | None: args = get_form_params() if is_virtual_doctype(args.doctype): controller = get_controller(args.doctype) - count = frappe.call(controller.get_count, args=args, **args) - else: - args.distinct = sbool(args.distinct) - distinct = "distinct " if args.distinct else "" - args.limit = cint(args.limit) - fieldname = f"{distinct}`tab{args.doctype}`.name" - args.order_by = None + return frappe.call(controller.get_count, args=args, **args) - if args.limit: - args.fields = [fieldname] - partial_query = execute(**args, run=0) - count = frappe.db.sql(f"""select count(*) from ( {partial_query} ) p""")[0][0] + args.distinct = sbool(args.distinct) + distinct = "distinct " if args.distinct else "" + args.limit = cint(args.limit) + fieldname = f"{distinct}`tab{args.doctype}`.name" + args.order_by = None - if count == args.limit: - frappe.local.response_headers.set( - "Cache-Control", "private,max-age=300,stale-while-revalidate=10800" - ) + # args.limit is specified to avoid getting accurate count. + if not args.limit: + args.fields = [f"count({fieldname}) as total_count"] + return execute(**args)[0].get("total_count") + + args.fields = [fieldname] + partial_query = execute(**args, run=0) + + # Count queries are notoriously unpredictable based on the type of filters used. + # We should not attempt to fetch accurate count for 2 entire minutes! (default timeout) + # Very short timeout is used to here to set an upper bound on damage a bad request can do. + # Users can request accurate count by dropping limit from arguments. + frappe.db.set_execution_timeout(1) + + try: + count = frappe.db.sql(f"""select count(*) from ( {partial_query} ) p""")[0][0] + except Exception as e: + if frappe.db.is_statement_timeout(e): # Skip fetching accurate count + count = None else: - args.fields = [f"count({fieldname}) as total_count"] - count = execute(**args)[0].get("total_count") + raise + + if count == args.limit or count is None: + frappe.local.response_headers.set("Cache-Control", "private,max-age=300,stale-while-revalidate=10800") return count diff --git a/frappe/public/js/frappe/list/list_view.js b/frappe/public/js/frappe/list/list_view.js index e8dff11cf1..88c422ef52 100644 --- a/frappe/public/js/frappe/list/list_view.js +++ b/frappe/public/js/frappe/list/list_view.js @@ -620,7 +620,7 @@ frappe.views.ListView = class ListView extends frappe.views.BaseList { let $count = this.get_count_element(); this.get_count_str().then((count) => { $count.html(`${count}`); - if (this.count_upper_bound && this.count_upper_bound == this.total_count) { + if (this.count_upper_bound) { $count.attr( "title", __( @@ -1017,13 +1017,15 @@ frappe.views.ListView = class ListView extends frappe.views.BaseList { Boolean(this.count_upper_bound) ) .then((total_count) => { - this.total_count = total_count || current_count; + this.total_count = total_count; this.count_without_children = count_without_children !== current_count ? count_without_children : undefined; let count_str; if (this.total_count === this.count_upper_bound) { count_str = `${format_number(this.total_count - 1, null, 0)}+`; + } else if (this.total_count == null) { + count_str = "??"; } else { count_str = format_number(this.total_count, null, 0); } From 6ea200a48f8f5fb635d9b9d07b381b340d3e8281 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sat, 25 Jan 2025 10:11:56 +0530 Subject: [PATCH 2/3] fix: handle typical client tracking errors --- frappe/utils/redis_wrapper.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/frappe/utils/redis_wrapper.py b/frappe/utils/redis_wrapper.py index 4ae597c18f..5bd4278cc8 100644 --- a/frappe/utils/redis_wrapper.py +++ b/frappe/utils/redis_wrapper.py @@ -10,6 +10,7 @@ from contextlib import suppress import redis from redis.commands.search import Search +from redis.exceptions import ResponseError import frappe from frappe.utils import cstr @@ -405,8 +406,17 @@ class _TrackedConnection(redis.Connection): self.register_connect_callback(self._enable_client_tracking) def _enable_client_tracking(self, conn): - conn.send_command("CLIENT", "TRACKING", "ON", "redirect", self._invalidator_id, "NOLOOP") - conn.read_response() + try: + conn.send_command("CLIENT", "TRACKING", "ON", "redirect", self._invalidator_id, "NOLOOP") + conn.read_response() + except ResponseError as e: + if "client ID" in str(e) and "does not exist" in str(e): + # Redis restarted, there's no easy way to recover from this. + frappe.client_cache.healthy = False + elif "unknown subcommand" in str(e).lower(): + raise Exception("Redis version is not supported, upgrade to Redis 6.0 or higher.") + else: + raise CachedValue = namedtuple("CachedValue", ["value", "expiry"]) From 67fafdd81276a8ac49636aaea8f5d22d9828f767 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sat, 25 Jan 2025 10:17:33 +0530 Subject: [PATCH 3/3] perf: Set timeout in same query / avoid side effects --- frappe/desk/reportview.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frappe/desk/reportview.py b/frappe/desk/reportview.py index 8b571cb46b..4f907ecdf1 100644 --- a/frappe/desk/reportview.py +++ b/frappe/desk/reportview.py @@ -75,10 +75,10 @@ def get_count() -> int | None: # We should not attempt to fetch accurate count for 2 entire minutes! (default timeout) # Very short timeout is used to here to set an upper bound on damage a bad request can do. # Users can request accurate count by dropping limit from arguments. - frappe.db.set_execution_timeout(1) + timeout_clause = "SET STATEMENT max_statement_time=1 FOR" if frappe.db.db_type == "mariadb" else "" try: - count = frappe.db.sql(f"""select count(*) from ( {partial_query} ) p""")[0][0] + count = frappe.db.sql(f"{timeout_clause} select count(*) from ( {partial_query} ) p")[0][0] except Exception as e: if frappe.db.is_statement_timeout(e): # Skip fetching accurate count count = None @@ -86,7 +86,7 @@ def get_count() -> int | None: raise if count == args.limit or count is None: - frappe.local.response_headers.set("Cache-Control", "private,max-age=300,stale-while-revalidate=10800") + frappe.local.response_headers.set("Cache-Control", "private,max-age=600,stale-while-revalidate=10800") return count