Merge pull request #35872 from czarflix/fix/perf-bulk-link-validation
perf(validation): optimize link validation with bulk pre-fetching
This commit is contained in:
commit
7c756aa811
2 changed files with 202 additions and 19 deletions
|
|
@ -48,6 +48,40 @@ DatetimeTypes = datetime.date | datetime.datetime | datetime.time | datetime.tim
|
|||
|
||||
max_positive_value = {"smallint": 2**15 - 1, "int": 2**31 - 1, "bigint": 2**63 - 1}
|
||||
|
||||
# Sentinel object for cache miss detection in bulk link validation
|
||||
# Used to distinguish between "not in cache" and "cached as None (does not exist)"
|
||||
_NOT_IN_CACHE = object()
|
||||
|
||||
|
||||
def _fetch_link_values(doctype: str, docname: str, fields: tuple, meta) -> dict | None:
|
||||
"""Fetch link field values from database with fallback logic.
|
||||
|
||||
This helper encapsulates the repeated DB query pattern:
|
||||
1. Try get_value with cache=True
|
||||
2. If not found, retry without cache (handles negative caching)
|
||||
3. For virtual doctypes, use frappe.get_doc instead
|
||||
|
||||
Args:
|
||||
doctype: Target DocType
|
||||
docname: Document name to fetch
|
||||
fields: Tuple of field names to fetch
|
||||
meta: Meta object for the doctype
|
||||
|
||||
Returns:
|
||||
Dict of field values or None if document doesn't exist
|
||||
"""
|
||||
if not meta.get("is_virtual"):
|
||||
values = frappe.db.get_value(doctype, docname, fields, as_dict=True, cache=True, order_by=None)
|
||||
if not values:
|
||||
values = frappe.db.get_value(doctype, docname, fields, as_dict=True, order_by=None)
|
||||
else:
|
||||
try:
|
||||
values = frappe.get_doc(doctype, docname).as_dict()
|
||||
except frappe.DoesNotExistError:
|
||||
values = None
|
||||
return values
|
||||
|
||||
|
||||
DOCTYPE_TABLE_FIELDS = [
|
||||
_dict(fieldname="fields", options="DocField"),
|
||||
_dict(fieldname="permissions", options="DocPerm"),
|
||||
|
|
@ -971,9 +1005,13 @@ class BaseDocument:
|
|||
|
||||
return missing
|
||||
|
||||
def get_invalid_links(self, is_submittable=False):
|
||||
"""Return list of invalid links and also update fetch values if not set."""
|
||||
def get_invalid_links(self, is_submittable=False, link_value_cache=None):
|
||||
"""Return list of invalid links and also update fetch values if not set.
|
||||
|
||||
Args:
|
||||
is_submittable: Whether the parent document is submittable
|
||||
link_value_cache: Cache of prefetched link values for bulk optimization
|
||||
"""
|
||||
is_submittable = is_submittable or self.meta.is_submittable
|
||||
|
||||
def get_msg(df, docname):
|
||||
|
|
@ -1019,23 +1057,45 @@ class BaseDocument:
|
|||
if not _df.get("fetch_if_empty")
|
||||
or (_df.get("fetch_if_empty") and not self.get(_df.fieldname))
|
||||
]
|
||||
values_to_fetch = (
|
||||
"name",
|
||||
*(_df.fetch_from.split(".")[-1] for _df in fields_to_fetch),
|
||||
)
|
||||
if check_docstatus:
|
||||
values_to_fetch += ("docstatus",)
|
||||
|
||||
if not meta.get("is_virtual"):
|
||||
values = frappe.db.get_value(
|
||||
doctype, docname, values_to_fetch, as_dict=True, cache=True, order_by=None
|
||||
values_to_fetch = tuple(
|
||||
sorted(
|
||||
{
|
||||
"name",
|
||||
*(_df.fetch_from.split(".")[-1] for _df in fields_to_fetch),
|
||||
*(("docstatus",) if check_docstatus else ()),
|
||||
}
|
||||
)
|
||||
if not values: # NOTE: DB Value cache does negative caching, which is hard to remove now.
|
||||
values = frappe.db.get_value(
|
||||
doctype, docname, values_to_fetch, as_dict=True, order_by=None
|
||||
)
|
||||
)
|
||||
|
||||
# Use cache if available (bulk optimization)
|
||||
if link_value_cache is not None:
|
||||
cache_for_dt = link_value_cache.get(doctype, {})
|
||||
|
||||
# Get cached value with sentinel for miss detection
|
||||
if frappe.db.db_type == "mariadb" and isinstance(docname, str):
|
||||
cached = cache_for_dt.get(docname, _NOT_IN_CACHE)
|
||||
if cached is _NOT_IN_CACHE:
|
||||
cached = cache_for_dt.get(docname.casefold(), _NOT_IN_CACHE)
|
||||
else:
|
||||
cached = cache_for_dt.get(docname, _NOT_IN_CACHE)
|
||||
|
||||
if cached is _NOT_IN_CACHE:
|
||||
# Not prefetched - fall back to original DB query path
|
||||
values = _fetch_link_values(doctype, docname, values_to_fetch, meta)
|
||||
elif cached is None:
|
||||
# Prefetch confirmed document doesn't exist
|
||||
values = _dict.fromkeys(values_to_fetch, None)
|
||||
elif all(f in cached for f in values_to_fetch):
|
||||
# Cache has all required fields
|
||||
values = cached
|
||||
else:
|
||||
# Cache missing some fields - fall back to DB
|
||||
values = _fetch_link_values(doctype, docname, values_to_fetch, meta)
|
||||
if values is None and not meta.get("is_virtual"):
|
||||
values = cached # Fall back to partial cache
|
||||
else:
|
||||
values = frappe.get_doc(doctype, docname).as_dict()
|
||||
# No cache - original behavior
|
||||
values = _fetch_link_values(doctype, docname, values_to_fetch, meta)
|
||||
|
||||
# fallback to dict with field_to_fetch=None if link field value is not found
|
||||
# (for compatibility, `values` must have same data type)
|
||||
|
|
|
|||
|
|
@ -1129,14 +1129,137 @@ class Document(BaseDocument):
|
|||
)
|
||||
)
|
||||
|
||||
def _prefetch_link_values(self):
|
||||
"""Pre-fetch all link values including fetch_from fields for bulk validation.
|
||||
|
||||
This optimization collects all Link/Dynamic Link values from the doc tree,
|
||||
then bulk-fetches them by doctype to eliminate N+1 queries.
|
||||
"""
|
||||
if self.flags.ignore_links or self._action == "cancel":
|
||||
return
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
def _chunk(iterable, size):
|
||||
"""Split iterable into chunks of given size."""
|
||||
lst = list(iterable)
|
||||
for i in range(0, len(lst), size):
|
||||
yield lst[i : i + size]
|
||||
|
||||
self._link_value_cache = {}
|
||||
docs_to_validate = [self, *self.get_all_children()]
|
||||
|
||||
# Collect: {doctype: {'names': set(), 'fields': set()}}
|
||||
prefetch_map = defaultdict(lambda: {"names": set(), "fields": {"name"}})
|
||||
|
||||
for doc in docs_to_validate:
|
||||
is_submittable = self.meta.is_submittable
|
||||
link_fields = doc.meta.get_link_fields() + doc.meta.get(
|
||||
"fields", {"fieldtype": ("=", "Dynamic Link")}
|
||||
)
|
||||
|
||||
for df in link_fields:
|
||||
docname = doc.get(df.fieldname)
|
||||
if not docname:
|
||||
continue
|
||||
|
||||
# Skip invalid docname types - let get_invalid_links handle the assertion
|
||||
if not isinstance(docname, str | int):
|
||||
continue
|
||||
|
||||
# Resolve target doctype
|
||||
if df.fieldtype == "Link":
|
||||
doctype = df.options
|
||||
if not doctype:
|
||||
continue
|
||||
else: # Dynamic Link
|
||||
doctype = doc.get(df.options)
|
||||
if not doctype:
|
||||
continue
|
||||
|
||||
prefetch_map[doctype]["names"].add(docname)
|
||||
|
||||
# Collect fetch_from fields - fetch ALL, let base_document handle fetch_if_empty
|
||||
for fetch_df in doc.meta.get_fields_to_fetch(df.fieldname):
|
||||
if fetch_df.get("fetch_from"):
|
||||
source_field = fetch_df.fetch_from.split(".")[-1]
|
||||
prefetch_map[doctype]["fields"].add(source_field)
|
||||
|
||||
# Add docstatus if needed
|
||||
target_meta = frappe.get_meta(doctype)
|
||||
if is_submittable and target_meta.is_submittable:
|
||||
prefetch_map[doctype]["fields"].add("docstatus")
|
||||
|
||||
# Bulk fetch with chunking
|
||||
for doctype, data in prefetch_map.items():
|
||||
meta = frappe.get_meta(doctype)
|
||||
names = list(data["names"])
|
||||
fields = sorted(data["fields"]) # Sorted for deterministic cache key matching
|
||||
|
||||
# Skip if no names to fetch for this doctype
|
||||
if not names:
|
||||
continue
|
||||
|
||||
if meta.get("is_virtual"):
|
||||
# Virtual doctypes: fetch individually
|
||||
for name in names:
|
||||
try:
|
||||
values = frappe.get_doc(doctype, name).as_dict()
|
||||
except frappe.DoesNotExistError:
|
||||
values = None
|
||||
self._link_value_cache.setdefault(doctype, {})[name] = values
|
||||
|
||||
elif getattr(meta, "issingle", 0):
|
||||
# Single doctypes
|
||||
values = frappe.db.get_singles_dict(doctype)
|
||||
values["name"] = doctype
|
||||
for name in names:
|
||||
self._link_value_cache.setdefault(doctype, {})[name] = frappe._dict(values)
|
||||
|
||||
else:
|
||||
# Regular doctypes: bulk fetch with chunking
|
||||
result_dict = {}
|
||||
field_tuple = tuple(fields)
|
||||
|
||||
for name_chunk in _chunk(names, 1000):
|
||||
results = frappe.db.get_all(
|
||||
doctype,
|
||||
filters={"name": ("in", name_chunk)},
|
||||
fields=fields,
|
||||
)
|
||||
for row in results:
|
||||
result_dict[row.name] = row
|
||||
# Case-insensitive key for MariaDB compatibility (strings only)
|
||||
if frappe.db.db_type == "mariadb" and isinstance(row.name, str):
|
||||
result_dict[row.name.casefold()] = row
|
||||
|
||||
# Store results in both caches
|
||||
for name in names:
|
||||
if frappe.db.db_type == "mariadb" and isinstance(name, str):
|
||||
cached_value = result_dict.get(name) or result_dict.get(name.casefold())
|
||||
else:
|
||||
cached_value = result_dict.get(name)
|
||||
|
||||
# Store in local document cache
|
||||
self._link_value_cache.setdefault(doctype, {})[name] = cached_value
|
||||
|
||||
# Also populate global db.value_cache for cross-document caching
|
||||
# Only for string names (matching get_values behavior at line 632)
|
||||
if cached_value is not None and isinstance(name, str):
|
||||
frappe.db.value_cache[doctype][name][field_tuple] = [cached_value]
|
||||
|
||||
def _validate_links(self):
|
||||
if self.flags.ignore_links or self._action == "cancel":
|
||||
return
|
||||
|
||||
invalid_links, cancelled_links = self.get_invalid_links()
|
||||
# Pre-fetch all link values in bulk
|
||||
self._prefetch_link_values()
|
||||
link_cache = getattr(self, "_link_value_cache", None)
|
||||
|
||||
invalid_links, cancelled_links = self.get_invalid_links(link_value_cache=link_cache)
|
||||
|
||||
for d in self.get_all_children():
|
||||
result = d.get_invalid_links(is_submittable=self.meta.is_submittable)
|
||||
result = d.get_invalid_links(is_submittable=self.meta.is_submittable, link_value_cache=link_cache)
|
||||
invalid_links.extend(result[0])
|
||||
cancelled_links.extend(result[1])
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue