From 3f86d478e811f6f6e6e737ee6c1461ba76ef3c3c Mon Sep 17 00:00:00 2001 From: Ayaan Ahmad Date: Mon, 12 Jan 2026 20:23:54 +0530 Subject: [PATCH 1/5] perf(validation): optimize link validation with bulk pre-fetching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements a _prefetch_link_values method that bulk-fetches all link values before validation, eliminating N+1 queries when saving documents with many child rows containing Link/Dynamic Link fields. Performance Impact: - 50 child rows: 51 queries → 3 queries (94% reduction) - 500 child rows: 501 queries → 3 queries (99.4% reduction) Implementation: - Uses instance-level cache (garbage collected after validation) - Sentinel pattern to distinguish cache miss from cached-None - DB-conditional case handling (MariaDB vs Postgres) - Chunking at 1000 items for safety - Backward compatible via **kwargs Edge Cases Handled: - Empty name lists (skip query) - Invalid docname types (preserves existing assertions) - Virtual doctypes (individual fetch) - Single doctypes (special handling) - Dynamic links with doctype changes (cache miss fallback) Closes #35794 --- frappe/model/base_document.py | 62 ++++++++++++++++- frappe/model/document.py | 124 +++++++++++++++++++++++++++++++++- 2 files changed, 181 insertions(+), 5 deletions(-) diff --git a/frappe/model/base_document.py b/frappe/model/base_document.py index 8ce1639e51..03ec83a6b8 100644 --- a/frappe/model/base_document.py +++ b/frappe/model/base_document.py @@ -48,6 +48,10 @@ DatetimeTypes = datetime.date | datetime.datetime | datetime.time | datetime.tim max_positive_value = {"smallint": 2**15 - 1, "int": 2**31 - 1, "bigint": 2**63 - 1} +# Sentinel object for cache miss detection in bulk link validation +# Used to distinguish between "not in cache" and "cached as None (does not exist)" +_NOT_IN_CACHE = object() + DOCTYPE_TABLE_FIELDS = [ _dict(fieldname="fields", options="DocField"), _dict(fieldname="permissions", options="DocPerm"), @@ -958,8 +962,14 @@ class BaseDocument: return missing - def get_invalid_links(self, is_submittable=False): - """Return list of invalid links and also update fetch values if not set.""" + def get_invalid_links(self, is_submittable=False, **kwargs): + """Return list of invalid links and also update fetch values if not set. + + Args: + is_submittable: Whether the parent document is submittable + **kwargs: Additional arguments (link_value_cache for bulk optimization) + """ + link_value_cache = kwargs.get("link_value_cache") is_submittable = is_submittable or self.meta.is_submittable @@ -1013,7 +1023,53 @@ class BaseDocument: if check_docstatus: values_to_fetch += ("docstatus",) - if not meta.get("is_virtual"): + # Use cache if available (bulk optimization) + if link_value_cache is not None: + cache_for_dt = link_value_cache.get(doctype, {}) + + # Get cached value with sentinel for miss detection + if frappe.db.db_type == "mariadb" and isinstance(docname, str): + cached = cache_for_dt.get(docname, _NOT_IN_CACHE) + if cached is _NOT_IN_CACHE: + cached = cache_for_dt.get(docname.casefold(), _NOT_IN_CACHE) + else: + cached = cache_for_dt.get(docname, _NOT_IN_CACHE) + + if cached is _NOT_IN_CACHE: + # Not prefetched - fall back to original DB query path + if not meta.get("is_virtual"): + values = frappe.db.get_value( + doctype, docname, values_to_fetch, as_dict=True, cache=True, order_by=None + ) + if not values: + values = frappe.db.get_value( + doctype, docname, values_to_fetch, as_dict=True, order_by=None + ) + else: + try: + values = frappe.get_doc(doctype, docname).as_dict() + except frappe.DoesNotExistError: + values = None + elif cached is None: + # Prefetch confirmed document doesn't exist + values = _dict.fromkeys(values_to_fetch, None) + elif all(f in cached for f in values_to_fetch): + # Cache has all required fields + values = cached + else: + # Cache missing some fields - fall back to DB + if not meta.get("is_virtual"): + values = frappe.db.get_value( + doctype, docname, values_to_fetch, as_dict=True, cache=True, order_by=None + ) + if not values: + values = frappe.db.get_value( + doctype, docname, values_to_fetch, as_dict=True, order_by=None + ) + else: + values = cached + elif not meta.get("is_virtual"): + # No cache - original behavior values = frappe.db.get_value( doctype, docname, values_to_fetch, as_dict=True, cache=True, order_by=None ) diff --git a/frappe/model/document.py b/frappe/model/document.py index 94086ee411..aa049e0b18 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -1129,14 +1129,134 @@ class Document(BaseDocument): ) ) + def _prefetch_link_values(self): + """Pre-fetch all link values including fetch_from fields for bulk validation. + + This optimization collects all Link/Dynamic Link values from the doc tree, + then bulk-fetches them by doctype to eliminate N+1 queries. + """ + if self.flags.ignore_links or self._action == "cancel": + return + + from collections import defaultdict + + def _chunk(iterable, size): + """Split iterable into chunks of given size.""" + lst = list(iterable) + for i in range(0, len(lst), size): + yield lst[i:i + size] + + self._link_value_cache = {} + docs_to_validate = [self] + self.get_all_children() + + # Collect: {doctype: {'names': set(), 'fields': set()}} + prefetch_map = defaultdict(lambda: {"names": set(), "fields": {"name"}}) + + for doc in docs_to_validate: + is_submittable = self.meta.is_submittable + link_fields = doc.meta.get_link_fields() + doc.meta.get( + "fields", {"fieldtype": ("=", "Dynamic Link")} + ) + + for df in link_fields: + docname = doc.get(df.fieldname) + if not docname: + continue + + # Skip invalid docname types - let get_invalid_links handle the assertion + if not isinstance(docname, str | int): + continue + + # Resolve target doctype + if df.fieldtype == "Link": + doctype = df.options + if not doctype: + continue + else: # Dynamic Link + doctype = doc.get(df.options) + if not doctype: + continue + + prefetch_map[doctype]["names"].add(docname) + + # Collect fetch_from fields + for fetch_df in doc.meta.get_fields_to_fetch(df.fieldname): + if not fetch_df.get("fetch_if_empty") or ( + fetch_df.get("fetch_if_empty") and not doc.get(fetch_df.fieldname) + ): + source_field = fetch_df.fetch_from.split(".")[-1] + prefetch_map[doctype]["fields"].add(source_field) + + # Add docstatus if needed + target_meta = frappe.get_meta(doctype) + if is_submittable and target_meta.is_submittable: + prefetch_map[doctype]["fields"].add("docstatus") + + # Bulk fetch with chunking + for doctype, data in prefetch_map.items(): + meta = frappe.get_meta(doctype) + names = list(data["names"]) + fields = list(data["fields"]) + + # Skip if no names to fetch for this doctype + if not names: + continue + + if meta.get("is_virtual"): + # Virtual doctypes: fetch individually + for name in names: + try: + values = frappe.get_doc(doctype, name).as_dict() + except frappe.DoesNotExistError: + values = None + self._link_value_cache.setdefault(doctype, {})[name] = values + + elif getattr(meta, "issingle", 0): + # Single doctypes + values = frappe.db.get_singles_dict(doctype) + values["name"] = doctype + for name in names: + self._link_value_cache.setdefault(doctype, {})[name] = frappe._dict(values) + + else: + # Regular doctypes: bulk fetch with chunking + result_dict = {} + for name_chunk in _chunk(names, 1000): + results = frappe.db.get_all( + doctype, + filters={"name": ("in", name_chunk)}, + fields=fields, + ) + for row in results: + result_dict[row.name] = row + # Case-insensitive key for MariaDB compatibility + if frappe.db.db_type == "mariadb": + result_dict[row.name.casefold()] = row + + # Store results (including None for missing names) + for name in names: + if frappe.db.db_type == "mariadb" and isinstance(name, str): + self._link_value_cache.setdefault(doctype, {})[name] = ( + result_dict.get(name) or result_dict.get(name.casefold()) + ) + else: + self._link_value_cache.setdefault(doctype, {})[name] = result_dict.get(name) + def _validate_links(self): if self.flags.ignore_links or self._action == "cancel": return - invalid_links, cancelled_links = self.get_invalid_links() + # Pre-fetch all link values in bulk + self._prefetch_link_values() + link_cache = getattr(self, "_link_value_cache", None) + + invalid_links, cancelled_links = self.get_invalid_links(link_value_cache=link_cache) for d in self.get_all_children(): - result = d.get_invalid_links(is_submittable=self.meta.is_submittable) + result = d.get_invalid_links( + is_submittable=self.meta.is_submittable, + link_value_cache=link_cache + ) invalid_links.extend(result[0]) cancelled_links.extend(result[1]) From f37890c31f579477c5947d39ed0469b585c09ce4 Mon Sep 17 00:00:00 2001 From: Ayaan Ahmad Date: Wed, 14 Jan 2026 22:31:22 +0530 Subject: [PATCH 2/5] fix(model): address PR feedback for bulk link validation - Replace **kwargs with explicit link_value_cache parameter - Check db.value_cache before get_all for cross-document caching - Populate db.value_cache after fetching for subsequent documents - Add sorted() for deterministic cache key matching - Add isinstance guard for integer docnames (MariaDB) - Fix linting issues (RUF005, slice spacing) --- frappe/model/base_document.py | 6 ++--- frappe/model/document.py | 41 +++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/frappe/model/base_document.py b/frappe/model/base_document.py index 03ec83a6b8..2aebeb126e 100644 --- a/frappe/model/base_document.py +++ b/frappe/model/base_document.py @@ -962,15 +962,13 @@ class BaseDocument: return missing - def get_invalid_links(self, is_submittable=False, **kwargs): + def get_invalid_links(self, is_submittable=False, link_value_cache=None): """Return list of invalid links and also update fetch values if not set. Args: is_submittable: Whether the parent document is submittable - **kwargs: Additional arguments (link_value_cache for bulk optimization) + link_value_cache: Cache of prefetched link values for bulk optimization """ - link_value_cache = kwargs.get("link_value_cache") - is_submittable = is_submittable or self.meta.is_submittable def get_msg(df, docname): diff --git a/frappe/model/document.py b/frappe/model/document.py index aa049e0b18..6843028796 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -1144,10 +1144,10 @@ class Document(BaseDocument): """Split iterable into chunks of given size.""" lst = list(iterable) for i in range(0, len(lst), size): - yield lst[i:i + size] + yield lst[i : i + size] self._link_value_cache = {} - docs_to_validate = [self] + self.get_all_children() + docs_to_validate = [self, *self.get_all_children()] # Collect: {doctype: {'names': set(), 'fields': set()}} prefetch_map = defaultdict(lambda: {"names": set(), "fields": {"name"}}) @@ -1196,7 +1196,7 @@ class Document(BaseDocument): for doctype, data in prefetch_map.items(): meta = frappe.get_meta(doctype) names = list(data["names"]) - fields = list(data["fields"]) + fields = sorted(data["fields"]) # Sorted for deterministic cache key matching # Skip if no names to fetch for this doctype if not names: @@ -1221,7 +1221,20 @@ class Document(BaseDocument): else: # Regular doctypes: bulk fetch with chunking result_dict = {} - for name_chunk in _chunk(names, 1000): + field_tuple = tuple(fields) + + # Check db.value_cache first for cross-document caching + names_to_query = [] + for name in names: + cached = frappe.db.value_cache.get(doctype, {}).get(name, {}).get(field_tuple) + if cached is not None: + # Use cached value from previous document in this transaction + self._link_value_cache.setdefault(doctype, {})[name] = cached[0] if cached else None + else: + names_to_query.append(name) + + # Only query for names not found in cache + for name_chunk in _chunk(names_to_query, 1000) if names_to_query else []: results = frappe.db.get_all( doctype, filters={"name": ("in", name_chunk)}, @@ -1229,18 +1242,24 @@ class Document(BaseDocument): ) for row in results: result_dict[row.name] = row - # Case-insensitive key for MariaDB compatibility - if frappe.db.db_type == "mariadb": + # Case-insensitive key for MariaDB compatibility (strings only) + if frappe.db.db_type == "mariadb" and isinstance(row.name, str): result_dict[row.name.casefold()] = row - # Store results (including None for missing names) + # Store results in both caches for name in names: if frappe.db.db_type == "mariadb" and isinstance(name, str): - self._link_value_cache.setdefault(doctype, {})[name] = ( - result_dict.get(name) or result_dict.get(name.casefold()) - ) + cached_value = result_dict.get(name) or result_dict.get(name.casefold()) else: - self._link_value_cache.setdefault(doctype, {})[name] = result_dict.get(name) + cached_value = result_dict.get(name) + + # Store in local document cache + self._link_value_cache.setdefault(doctype, {})[name] = cached_value + + # Also populate global db.value_cache for cross-document caching + # Only for string names (matching get_values behavior at line 632) + if cached_value is not None and isinstance(name, str): + frappe.db.value_cache[doctype][name][field_tuple] = [cached_value] def _validate_links(self): if self.flags.ignore_links or self._action == "cancel": From 6197b73d52c8810a312be18e2a180b665949e66d Mon Sep 17 00:00:00 2001 From: Ayaan Ahmad Date: Thu, 15 Jan 2026 00:21:04 +0530 Subject: [PATCH 3/5] fix(model): resolve CI failures in bulk link validation - Remove cache-check-before-get_all due to recursive_defaultdict edge cases - Fix ruff-format (multi-line to single-line function call) - Keep cache population after fetch (addresses maintainer request) - Preserve core N+1 optimization (bulk fetching via get_all) --- frappe/model/document.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index 6843028796..b45b62a4d2 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -1223,18 +1223,7 @@ class Document(BaseDocument): result_dict = {} field_tuple = tuple(fields) - # Check db.value_cache first for cross-document caching - names_to_query = [] - for name in names: - cached = frappe.db.value_cache.get(doctype, {}).get(name, {}).get(field_tuple) - if cached is not None: - # Use cached value from previous document in this transaction - self._link_value_cache.setdefault(doctype, {})[name] = cached[0] if cached else None - else: - names_to_query.append(name) - - # Only query for names not found in cache - for name_chunk in _chunk(names_to_query, 1000) if names_to_query else []: + for name_chunk in _chunk(names, 1000): results = frappe.db.get_all( doctype, filters={"name": ("in", name_chunk)}, @@ -1272,10 +1261,7 @@ class Document(BaseDocument): invalid_links, cancelled_links = self.get_invalid_links(link_value_cache=link_cache) for d in self.get_all_children(): - result = d.get_invalid_links( - is_submittable=self.meta.is_submittable, - link_value_cache=link_cache - ) + result = d.get_invalid_links(is_submittable=self.meta.is_submittable, link_value_cache=link_cache) invalid_links.extend(result[0]) cancelled_links.extend(result[1]) From 614bb642ef343cfd106812f4af96d596e6cbb579 Mon Sep 17 00:00:00 2001 From: Ayaan Ahmad Date: Sat, 17 Jan 2026 21:29:48 +0530 Subject: [PATCH 4/5] refactor(model): simplify prefetch per szufisher's suggestion - Remove fetch_if_empty check from prefetch phase - Fetch ALL fields, let base_document.py handle fetch_if_empty - Avoids DRY violation (same logic in two places) - Efficiency difference is negligible --- frappe/model/document.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index b45b62a4d2..e1d5bc0e64 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -1179,11 +1179,9 @@ class Document(BaseDocument): prefetch_map[doctype]["names"].add(docname) - # Collect fetch_from fields + # Collect fetch_from fields - fetch ALL, let base_document handle fetch_if_empty for fetch_df in doc.meta.get_fields_to_fetch(df.fieldname): - if not fetch_df.get("fetch_if_empty") or ( - fetch_df.get("fetch_if_empty") and not doc.get(fetch_df.fieldname) - ): + if fetch_df.get("fetch_from"): source_field = fetch_df.fetch_from.split(".")[-1] prefetch_map[doctype]["fields"].add(source_field) From ddc757dcc807f00149badb5dcb7c2de909ac06d3 Mon Sep 17 00:00:00 2001 From: Ayaan Ahmad Date: Wed, 4 Feb 2026 00:32:46 +0530 Subject: [PATCH 5/5] refactor(model): extract repeated DB query logic into helper function Address @akhilnarang's code review feedback: 1. Extract _fetch_link_values helper function - Encapsulates repeated DB query pattern (was duplicated 3 times) - Handles cache=True fallback and virtual doctype logic 2. Sort values_to_fetch for cache key consistency - Uses tuple(sorted({...})) to match prefetch sorting - Prevents cache misses due to field order differences --- frappe/model/base_document.py | 82 +++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/frappe/model/base_document.py b/frappe/model/base_document.py index 2aebeb126e..577e5d26ff 100644 --- a/frappe/model/base_document.py +++ b/frappe/model/base_document.py @@ -52,6 +52,36 @@ max_positive_value = {"smallint": 2**15 - 1, "int": 2**31 - 1, "bigint": 2**63 - # Used to distinguish between "not in cache" and "cached as None (does not exist)" _NOT_IN_CACHE = object() + +def _fetch_link_values(doctype: str, docname: str, fields: tuple, meta) -> dict | None: + """Fetch link field values from database with fallback logic. + + This helper encapsulates the repeated DB query pattern: + 1. Try get_value with cache=True + 2. If not found, retry without cache (handles negative caching) + 3. For virtual doctypes, use frappe.get_doc instead + + Args: + doctype: Target DocType + docname: Document name to fetch + fields: Tuple of field names to fetch + meta: Meta object for the doctype + + Returns: + Dict of field values or None if document doesn't exist + """ + if not meta.get("is_virtual"): + values = frappe.db.get_value(doctype, docname, fields, as_dict=True, cache=True, order_by=None) + if not values: + values = frappe.db.get_value(doctype, docname, fields, as_dict=True, order_by=None) + else: + try: + values = frappe.get_doc(doctype, docname).as_dict() + except frappe.DoesNotExistError: + values = None + return values + + DOCTYPE_TABLE_FIELDS = [ _dict(fieldname="fields", options="DocField"), _dict(fieldname="permissions", options="DocPerm"), @@ -1014,12 +1044,15 @@ class BaseDocument: if not _df.get("fetch_if_empty") or (_df.get("fetch_if_empty") and not self.get(_df.fieldname)) ] - values_to_fetch = ( - "name", - *(_df.fetch_from.split(".")[-1] for _df in fields_to_fetch), + values_to_fetch = tuple( + sorted( + { + "name", + *(_df.fetch_from.split(".")[-1] for _df in fields_to_fetch), + *(("docstatus",) if check_docstatus else ()), + } + ) ) - if check_docstatus: - values_to_fetch += ("docstatus",) # Use cache if available (bulk optimization) if link_value_cache is not None: @@ -1035,19 +1068,7 @@ class BaseDocument: if cached is _NOT_IN_CACHE: # Not prefetched - fall back to original DB query path - if not meta.get("is_virtual"): - values = frappe.db.get_value( - doctype, docname, values_to_fetch, as_dict=True, cache=True, order_by=None - ) - if not values: - values = frappe.db.get_value( - doctype, docname, values_to_fetch, as_dict=True, order_by=None - ) - else: - try: - values = frappe.get_doc(doctype, docname).as_dict() - except frappe.DoesNotExistError: - values = None + values = _fetch_link_values(doctype, docname, values_to_fetch, meta) elif cached is None: # Prefetch confirmed document doesn't exist values = _dict.fromkeys(values_to_fetch, None) @@ -1056,27 +1077,12 @@ class BaseDocument: values = cached else: # Cache missing some fields - fall back to DB - if not meta.get("is_virtual"): - values = frappe.db.get_value( - doctype, docname, values_to_fetch, as_dict=True, cache=True, order_by=None - ) - if not values: - values = frappe.db.get_value( - doctype, docname, values_to_fetch, as_dict=True, order_by=None - ) - else: - values = cached - elif not meta.get("is_virtual"): - # No cache - original behavior - values = frappe.db.get_value( - doctype, docname, values_to_fetch, as_dict=True, cache=True, order_by=None - ) - if not values: # NOTE: DB Value cache does negative caching, which is hard to remove now. - values = frappe.db.get_value( - doctype, docname, values_to_fetch, as_dict=True, order_by=None - ) + values = _fetch_link_values(doctype, docname, values_to_fetch, meta) + if values is None and not meta.get("is_virtual"): + values = cached # Fall back to partial cache else: - values = frappe.get_doc(doctype, docname).as_dict() + # No cache - original behavior + values = _fetch_link_values(doctype, docname, values_to_fetch, meta) # fallback to dict with field_to_fetch=None if link field value is not found # (for compatibility, `values` must have same data type)