fix: get_docs - Always use iterator internally

When `get_docs` output is unknown, we might end up generating queries
for child table with `in (...)` containing thousands of doc names.

This doesn't fare well with databases, so it's better to chunk it to
1000 by default. This is an acceptable tradeoff IMO.
This commit is contained in:
Ankush Menat 2026-04-08 21:59:44 +05:30
parent 2364216fb1
commit 02510e506a

View file

@ -194,14 +194,17 @@ def get_docs(
if limit_start and limit is None: if limit_start and limit is None:
frappe.throw(_("limit cannot be None when limit_start is used")) frappe.throw(_("limit cannot be None when limit_start is used"))
if not order_by:
# Sort order is mandatory for iterator logic
order_by = "name asc"
child_tables = [ child_tables = [
(df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options) (df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options)
] ]
controller = get_controller(doctype) controller = get_controller(doctype)
for_update = for_update and frappe.db.db_type != "sqlite" for_update = for_update and frappe.db.db_type != "sqlite"
if as_iterator: iterator = _get_docs_generator(
return _get_docs_generator(
doctype, doctype,
controller, controller,
child_tables, child_tables,
@ -214,19 +217,9 @@ def get_docs(
distinct=distinct, distinct=distinct,
) )
# Eagerly fetch all docs if as_iterator:
all_data = _fetch_rows( return iterator
doctype, return list(iterator)
filters=filters,
order_by=order_by,
limit=limit,
offset=limit_start,
for_update=for_update,
child_tables=child_tables,
distinct=distinct,
)
return _build_document_objects(controller, all_data, for_update)
def _get_docs_generator( def _get_docs_generator(
@ -267,8 +260,7 @@ def _get_docs_generator(
if not chunk_data: if not chunk_data:
break break
built_docs = _build_document_objects(controller, chunk_data, for_update) yield from _build_document_objects(controller, chunk_data, for_update)
yield from built_docs
fetched_count += len(chunk_data) fetched_count += len(chunk_data)
current_offset += len(chunk_data) current_offset += len(chunk_data)
@ -323,17 +315,11 @@ def _fetch_rows(doctype, *, filters, order_by, limit, offset, for_update, child_
def _build_document_objects(controller, data: list, for_update: bool): def _build_document_objects(controller, data: list, for_update: bool):
if not data:
return []
built_docs = []
for row in data: for row in data:
doc = controller(row) doc = controller(row)
if for_update: if for_update:
doc.flags.for_update = True doc.flags.for_update = True
built_docs.append(doc) yield doc
return built_docs
def get_doc_permission_check(doc: "Document", check_permission: str | bool | None = None) -> "Document": def get_doc_permission_check(doc: "Document", check_permission: str | bool | None = None) -> "Document":