fix: get_docs - Always use iterator internally

When `get_docs` output is unknown, we might end up generating queries
for child table with `in (...)` containing thousands of doc names.

This doesn't fare well with databases, so it's better to chunk it to
1000 by default. This is an acceptable tradeoff IMO.
This commit is contained in:
Ankush Menat 2026-04-08 21:59:44 +05:30
parent 2364216fb1
commit 02510e506a

View file

@ -194,14 +194,17 @@ def get_docs(
if limit_start and limit is None:
frappe.throw(_("limit cannot be None when limit_start is used"))
if not order_by:
# Sort order is mandatory for iterator logic
order_by = "name asc"
child_tables = [
(df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options)
]
controller = get_controller(doctype)
for_update = for_update and frappe.db.db_type != "sqlite"
if as_iterator:
return _get_docs_generator(
iterator = _get_docs_generator(
doctype,
controller,
child_tables,
@ -214,19 +217,9 @@ def get_docs(
distinct=distinct,
)
# Eagerly fetch all docs
all_data = _fetch_rows(
doctype,
filters=filters,
order_by=order_by,
limit=limit,
offset=limit_start,
for_update=for_update,
child_tables=child_tables,
distinct=distinct,
)
return _build_document_objects(controller, all_data, for_update)
if as_iterator:
return iterator
return list(iterator)
def _get_docs_generator(
@ -267,8 +260,7 @@ def _get_docs_generator(
if not chunk_data:
break
built_docs = _build_document_objects(controller, chunk_data, for_update)
yield from built_docs
yield from _build_document_objects(controller, chunk_data, for_update)
fetched_count += len(chunk_data)
current_offset += len(chunk_data)
@ -323,17 +315,11 @@ def _fetch_rows(doctype, *, filters, order_by, limit, offset, for_update, child_
def _build_document_objects(controller, data: list, for_update: bool):
if not data:
return []
built_docs = []
for row in data:
doc = controller(row)
if for_update:
doc.flags.for_update = True
built_docs.append(doc)
return built_docs
yield doc
def get_doc_permission_check(doc: "Document", check_permission: str | bool | None = None) -> "Document":