From 02510e506ae7b9b571e5433c86a1aef2464eb13f Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Wed, 8 Apr 2026 21:59:44 +0530 Subject: [PATCH] fix: get_docs - Always use iterator internally When `get_docs` output is unknown, we might end up generating queries for child table with `in (...)` containing thousands of doc names. This doesn't fare well with databases, so it's better to chunk it to 1000 by default. This is an acceptable tradeoff IMO. --- frappe/model/document.py | 44 ++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index 6620884749..e1c60bbd78 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -194,39 +194,32 @@ def get_docs( if limit_start and limit is None: frappe.throw(_("limit cannot be None when limit_start is used")) + if not order_by: + # Sort order is mandatory for iterator logic + order_by = "name asc" + child_tables = [ (df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options) ] controller = get_controller(doctype) for_update = for_update and frappe.db.db_type != "sqlite" - if as_iterator: - return _get_docs_generator( - doctype, - controller, - child_tables, - filters=filters, - chunk_size=chunk_size, - limit=limit, - limit_start=limit_start, - order_by=order_by, - for_update=for_update, - distinct=distinct, - ) - - # Eagerly fetch all docs - all_data = _fetch_rows( + iterator = _get_docs_generator( doctype, + controller, + child_tables, filters=filters, - order_by=order_by, + chunk_size=chunk_size, limit=limit, - offset=limit_start, + limit_start=limit_start, + order_by=order_by, for_update=for_update, - child_tables=child_tables, distinct=distinct, ) - return _build_document_objects(controller, all_data, for_update) + if as_iterator: + return iterator + return list(iterator) def _get_docs_generator( @@ -267,8 +260,7 @@ def _get_docs_generator( if not chunk_data: break - built_docs = _build_document_objects(controller, chunk_data, for_update) - yield from built_docs + yield from _build_document_objects(controller, chunk_data, for_update) fetched_count += len(chunk_data) current_offset += len(chunk_data) @@ -323,17 +315,11 @@ def _fetch_rows(doctype, *, filters, order_by, limit, offset, for_update, child_ def _build_document_objects(controller, data: list, for_update: bool): - if not data: - return [] - - built_docs = [] for row in data: doc = controller(row) if for_update: doc.flags.for_update = True - built_docs.append(doc) - - return built_docs + yield doc def get_doc_permission_check(doc: "Document", check_permission: str | bool | None = None) -> "Document":