From 1f969716223cfb9c2572b1223be4002db15bcd14 Mon Sep 17 00:00:00 2001 From: Shrihari Mahabal Date: Mon, 9 Mar 2026 12:30:27 +0530 Subject: [PATCH 01/13] feat: get_docs to get multiple instantiated document objects --- frappe/__init__.py | 1 + frappe/model/document.py | 168 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) diff --git a/frappe/__init__.py b/frappe/__init__.py index 8996a7fbe3..2a9a508921 100644 --- a/frappe/__init__.py +++ b/frappe/__init__.py @@ -1573,6 +1573,7 @@ from frappe.config import get_common_site_config, get_conf, get_site_config from frappe.core.doctype.system_settings.system_settings import get_system_settings from frappe.model.document import ( get_doc, + get_docs, get_lazy_doc, copy_doc, new_doc, diff --git a/frappe/model/document.py b/frappe/model/document.py index b94cf61173..89e06a8a7f 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -156,6 +156,174 @@ def get_lazy_doc( raise ImportError(doctype) +def get_docs( + doctype: str, + filters: dict | None = None, + *, + chunk_size: int = 1000, + limit: int | None = None, + limit_start: int = 0, + order_by: str = "creation asc", + as_generator: bool = False, + for_update: bool = False, +) -> list["Document"] | Generator[list["Document"]]: + """Fetch fully instantiated Document objects from the database. + + Returns a list of Documents by default. Pass ``as_generator=True`` to get + a chunked generator that yields a list of Documents per chunk to reduce + peak memory usage. + """ + if is_virtual_doctype(doctype): + frappe.throw(_("Virtual DocType {0} cannot be fetched in bulk.").format(doctype)) + + meta = frappe.get_meta(doctype) + + if meta.issingle: + frappe.throw(_("Single DocType {0} cannot be fetched in bulk.").format(doctype)) + + if limit_start and limit is None: + frappe.throw(_("limit cannot be None when limit_start is used")) + + child_tables = [ + (df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options) + ] + controller = get_controller(doctype) + lock_rows = for_update and frappe.db.db_type != "sqlite" + + if as_generator: + return _get_docs_generator( + doctype, + controller, + child_tables, + filters=filters, + chunk_size=chunk_size, + limit=limit, + limit_start=limit_start, + order_by=order_by, + lock_rows=lock_rows, + for_update=for_update, + ) + + # Eagerly fetch all docs + all_data = _fetch_rows( + doctype, + filters=filters, + order_by=order_by, + limit=limit, + offset=limit_start, + lock_rows=lock_rows, + child_tables=child_tables, + ) + + return _build_document_objects(controller, all_data, for_update) + + +def _get_docs_generator( + doctype, + controller, + child_tables, + *, + filters, + chunk_size, + limit, + limit_start, + order_by, + lock_rows, + for_update, +) -> Generator[list["Document"]]: + fetched_count = 0 + current_offset = limit_start + + while True: + current_chunk_size = chunk_size + if limit is not None: + remaining = limit - fetched_count + if remaining <= 0: + break + current_chunk_size = min(chunk_size, remaining) + + chunk_data = _fetch_rows( + doctype, + filters=filters, + order_by=order_by, + limit=current_chunk_size, + offset=current_offset, + lock_rows=lock_rows, + child_tables=child_tables, + ) + + if not chunk_data: + break + + built_docs = _build_document_objects(controller, chunk_data, for_update) + yield built_docs + + fetched_count += len(chunk_data) + current_offset += len(chunk_data) + + +def _fetch_rows(doctype, *, filters, order_by, limit, offset, lock_rows, child_tables): + kwargs = {} + if limit is not None: + kwargs["limit"] = limit + if offset: + kwargs["offset"] = offset + + data = frappe.qb.get_query( + table=doctype, + filters=filters or {}, + fields=["*"], + order_by=order_by, + for_update=lock_rows, + **kwargs, + ).run(as_dict=True) + + if not data: + return [] + + for row in data: + row["doctype"] = doctype + + fetched_docs_by_name = {row.name: row for row in data} + parent_names = list(fetched_docs_by_name.keys()) + + for fieldname, child_doctype in child_tables: + child_table_data = frappe.qb.get_query( + table=child_doctype, + filters={"parent": ("in", parent_names), "parenttype": doctype, "parentfield": fieldname}, + fields=["*"], + order_by="idx asc", + for_update=lock_rows, + ).run(as_dict=True) + + for child in child_table_data: + child["doctype"] = child_doctype + + for parent_doc in fetched_docs_by_name.values(): + parent_doc[fieldname] = [] + + for child in child_table_data: + if child.parent in fetched_docs_by_name: + fetched_docs_by_name[child.parent][fieldname].append(child) + + return list(fetched_docs_by_name.values()) + + +def _build_document_objects(controller, data: list, for_update: bool): + if not data: + return [] + + built_docs = [] + for row in data: + doc = controller(row) + if for_update: + doc.flags.for_update = True + doc.mask_fields() + built_docs.append(doc) + + return built_docs + + def get_doc_permission_check(doc: "Document", check_permission: str | bool | None = None) -> "Document": """ Checks permissions for the given document, if specified. From e2fef24a08ba4448500f7ea25d98a8117534573d Mon Sep 17 00:00:00 2001 From: Shrihari Mahabal Date: Mon, 9 Mar 2026 12:31:08 +0530 Subject: [PATCH 02/13] test: add tests for get_docs --- frappe/tests/test_document.py | 115 ++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/frappe/tests/test_document.py b/frappe/tests/test_document.py index 8c4ed6ac01..7486a3cef3 100644 --- a/frappe/tests/test_document.py +++ b/frappe/tests/test_document.py @@ -782,3 +782,118 @@ class TestLazyDocument(IntegrationTestCase): def test_for_update(self): guest = frappe.get_lazy_doc("User", "Guest", for_update=True) self.assertTrue(guest.flags.for_update) + + +class TestGetDocs(IntegrationTestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.child_dt = "Test Get Docs Child" + cls.parent_dt = "Test Get Docs Parent" + + if not frappe.db.exists("DocType", cls.child_dt): + new_doctype(name=cls.child_dt, istable=1).insert() + + if not frappe.db.exists("DocType", cls.parent_dt): + new_doctype( + name=cls.parent_dt, + fields=[ + {"fieldtype": "Data", "fieldname": "title", "label": "Title"}, + { + "fieldtype": "Table", + "fieldname": "child_table", + "options": cls.child_dt, + "label": "Child Table", + }, + ], + ).insert() + + if not frappe.db.count(cls.parent_dt): + for i in range(5): + frappe.get_doc( + { + "doctype": cls.parent_dt, + "title": f"Record {i}", + "child_table": [ + {"some_fieldname": f"child_{i}_0"}, + {"some_fieldname": f"child_{i}_1"}, + ], + } + ).insert() + + @classmethod + def tearDownClass(cls): + frappe.db.delete(cls.child_dt) + frappe.db.delete(cls.parent_dt) + frappe.delete_doc("DocType", cls.parent_dt, force=True) + frappe.delete_doc("DocType", cls.child_dt, force=True) + super().tearDownClass() + + def test_returns_document_instances(self): + docs = frappe.get_docs(self.parent_dt) + self.assertEqual(len(docs), 5) + self.assertIsInstance(docs[0], frappe.model.document.Document) + self.assertEqual(docs[0].doctype, self.parent_dt) + + def test_child_tables_populated(self): + docs = frappe.get_docs(self.parent_dt) + for doc in docs: + self.assertEqual(len(doc.child_table), 2) + for child in doc.child_table: + self.assertIsInstance(child, frappe.model.document.Document) + self.assertEqual(child.doctype, self.child_dt) + + def test_parity_with_get_doc(self): + docs = frappe.get_docs(self.parent_dt, limit=1) + doc_bulk = docs[0] + doc_single = frappe.get_doc(self.parent_dt, doc_bulk.name) + + self.assertEqual(doc_bulk.as_dict(), doc_single.as_dict()) + + def test_filters(self): + docs = frappe.get_docs(self.parent_dt, filters={"title": "Record 0"}) + self.assertEqual(len(docs), 1) + self.assertEqual(docs[0].title, "Record 0") + + def test_limit(self): + docs = frappe.get_docs(self.parent_dt, limit=2) + self.assertEqual(len(docs), 2) + + def test_limit_start(self): + all_docs = frappe.get_docs(self.parent_dt, order_by="creation asc") + offset_docs = frappe.get_docs(self.parent_dt, limit_start=2, limit=5, order_by="creation asc") + self.assertEqual(len(offset_docs), 3) + self.assertEqual(offset_docs[0].name, all_docs[2].name) + + def test_order_by(self): + docs_asc = frappe.get_docs(self.parent_dt, order_by="creation asc") + docs_desc = frappe.get_docs(self.parent_dt, order_by="creation desc") + self.assertEqual(docs_asc[0].name, docs_desc[-1].name) + + def test_generator_yields_chunks(self): + chunks = list(frappe.get_docs(self.parent_dt, as_generator=True, chunk_size=2)) + # 5 records with chunk_size=2 should give 3 chunks (2, 2, 1) + self.assertEqual(len(chunks), 3) + self.assertEqual(len(chunks[0]), 2) + self.assertEqual(len(chunks[1]), 2) + self.assertEqual(len(chunks[2]), 1) + + def test_generator_with_limit(self): + chunks = list(frappe.get_docs(self.parent_dt, as_generator=True, chunk_size=2, limit=3)) + total = sum(len(c) for c in chunks) + self.assertEqual(total, 3) + + def test_generator_parity(self): + eager = frappe.get_docs(self.parent_dt, order_by="creation asc") + gen_docs = [ + doc + for chunk in frappe.get_docs( + self.parent_dt, as_generator=True, chunk_size=2, order_by="creation asc" + ) + for doc in chunk + ] + self.assertEqual([d.name for d in eager], [d.name for d in gen_docs]) + + def test_for_update_sets_flag(self): + docs = frappe.get_docs(self.parent_dt, limit=1, for_update=True) + self.assertTrue(docs[0].flags.for_update) From 16efc5fa454bfe06d574330524bf7cb7930ef1ba Mon Sep 17 00:00:00 2001 From: Shrihari Mahabal Date: Mon, 9 Mar 2026 12:39:56 +0530 Subject: [PATCH 03/13] chore: add docstring for get_docs --- frappe/model/document.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index 89e06a8a7f..65cb564fc2 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -169,9 +169,17 @@ def get_docs( ) -> list["Document"] | Generator[list["Document"]]: """Fetch fully instantiated Document objects from the database. - Returns a list of Documents by default. Pass ``as_generator=True`` to get - a chunked generator that yields a list of Documents per chunk to reduce - peak memory usage. + Returns a list of Documents by default. Pass `as_generator=True` to get + a chunked generator that yields a list of Documents per chunk to reduce memory usage. + + :param doctype: DocType of the records to fetch. + :param filters: Dict or list of filters to apply. + :param chunk_size: Number of records to yield per chunk if using `as_generator`. Default 1000. + :param limit: Maximum total number of records to fetch. + :param limit_start: Start results at record #. Default 0. + :param order_by: Order By string, e.g. `creation desc`. + :param as_generator: If True, returns a generator yielding lists of Documents. + :param for_update: If True, locks the fetched rows for update. """ if is_virtual_doctype(doctype): frappe.throw(_("Virtual DocType {0} cannot be fetched in bulk.").format(doctype)) From c174881534f89f2a8231de2c6b9c65f869bb820c Mon Sep 17 00:00:00 2001 From: Shrihari Mahabal Date: Tue, 10 Mar 2026 13:26:38 +0530 Subject: [PATCH 04/13] refactor: change existing functionality in framework to check if get_docs is working --- frappe/core/doctype/user/user.py | 10 ++++------ frappe/core/doctype/user_invitation/user_invitation.py | 3 +-- frappe/desk/doctype/event/event.py | 6 ++---- .../doctype/auto_email_report/auto_email_report.py | 4 ++-- frappe/utils/scheduler.py | 3 +-- 5 files changed, 10 insertions(+), 16 deletions(-) diff --git a/frappe/core/doctype/user/user.py b/frappe/core/doctype/user/user.py index b1831d59f9..81e1688bad 100644 --- a/frappe/core/doctype/user/user.py +++ b/frappe/core/doctype/user/user.py @@ -617,18 +617,16 @@ class User(Document): frappe.db.delete("List Filter", {"for_user": self.name}) # Remove user from Note's Seen By table - seen_notes = frappe.get_all("Note", filters=[["Note Seen By", "user", "=", self.name]], pluck="name") - for note_id in seen_notes: - note = frappe.get_doc("Note", note_id) + seen_notes = frappe.get_docs("Note", filters=[["Note Seen By", "user", "=", self.name]]) + for note in seen_notes: for row in note.seen_by: if row.user == self.name: note.remove(row) note.save(ignore_permissions=True) # Unlink user from all of its invitation docs - invites = frappe.db.get_all("User Invitation", filters={"email": self.name}, pluck="name") - for invite in invites: - invite_doc = frappe.get_doc("User Invitation", invite) + invites = frappe.get_docs("User Invitation", filters={"email": self.name}) + for invite_doc in invites: invite_doc.user = None invite_doc.save(ignore_permissions=True) diff --git a/frappe/core/doctype/user_invitation/user_invitation.py b/frappe/core/doctype/user_invitation/user_invitation.py index 4d098d5af7..582ee2dfbe 100644 --- a/frappe/core/doctype/user_invitation/user_invitation.py +++ b/frappe/core/doctype/user_invitation/user_invitation.py @@ -206,12 +206,11 @@ class UserInvitation(Document): def mark_expired_invitations() -> None: days = 3 - invitations_to_expire = frappe.db.get_all( + invitations_to_expire = frappe.get_docs( "User Invitation", filters={"status": "Pending", "creation": ["<", frappe.utils.add_days(frappe.utils.now(), -days)]}, ) for invitation in invitations_to_expire: - invitation = frappe.get_doc("User Invitation", invitation.name) invitation.expire() # to avoid losing work in case the job times out without finishing frappe.db.commit() # nosemgrep diff --git a/frappe/desk/doctype/event/event.py b/frappe/desk/doctype/event/event.py index 99af59a417..33e480bb8c 100644 --- a/frappe/desk/doctype/event/event.py +++ b/frappe/desk/doctype/event/event.py @@ -137,7 +137,7 @@ class Event(Document): return for participant in self.event_participants: - if communications := frappe.get_all( + if communications := frappe.get_docs( "Communication", filters=[ ["Communication", "reference_doctype", "=", self.doctype], @@ -145,11 +145,9 @@ class Event(Document): ["Communication Link", "link_doctype", "=", participant.reference_doctype], ["Communication Link", "link_name", "=", participant.reference_docname], ], - pluck="name", distinct=True, ): - for comm in communications: - communication = frappe.get_doc("Communication", comm) + for communication in communications: self.update_communication(participant, communication) else: meta = frappe.get_meta(participant.reference_doctype) diff --git a/frappe/email/doctype/auto_email_report/auto_email_report.py b/frappe/email/doctype/auto_email_report/auto_email_report.py index f3109511a2..96f3ae0c44 100644 --- a/frappe/email/doctype/auto_email_report/auto_email_report.py +++ b/frappe/email/doctype/auto_email_report/auto_email_report.py @@ -359,8 +359,8 @@ def process_auto_email_report(report): def send_monthly(): """Check reports to be sent monthly""" - for report in frappe.get_all("Auto Email Report", {"enabled": 1, "frequency": "Monthly"}): - frappe.get_doc("Auto Email Report", report.name).send() + for report in frappe.get_docs("Auto Email Report", filters={"enabled": 1, "frequency": "Monthly"}): + report.send() def make_links(columns, data): diff --git a/frappe/utils/scheduler.py b/frappe/utils/scheduler.py index 2d8075f8e9..8555bdd639 100644 --- a/frappe/utils/scheduler.py +++ b/frappe/utils/scheduler.py @@ -132,10 +132,9 @@ def enqueue_events_for_site(site: str) -> None: def enqueue_events() -> list[str] | None: if schedule_jobs_based_on_activity(): enqueued_jobs = [] - all_jobs = frappe.get_all("Scheduled Job Type", filters={"stopped": 0}, fields="*") + all_jobs = frappe.get_docs("Scheduled Job Type", filters={"stopped": 0}) random.shuffle(all_jobs) for job_type in all_jobs: - job_type = frappe.get_doc(doctype="Scheduled Job Type", **job_type) try: if job_type.enqueue(): enqueued_jobs.append(job_type.method) From 7ff564c227f0457113b8affde2408e875491a135 Mon Sep 17 00:00:00 2001 From: Shrihari Mahabal Date: Tue, 10 Mar 2026 14:00:08 +0530 Subject: [PATCH 05/13] refactor: add support for distinct in get_docs --- frappe/model/document.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index 65cb564fc2..20db79d9c1 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -166,6 +166,7 @@ def get_docs( order_by: str = "creation asc", as_generator: bool = False, for_update: bool = False, + distinct: bool = False, ) -> list["Document"] | Generator[list["Document"]]: """Fetch fully instantiated Document objects from the database. @@ -180,6 +181,7 @@ def get_docs( :param order_by: Order By string, e.g. `creation desc`. :param as_generator: If True, returns a generator yielding lists of Documents. :param for_update: If True, locks the fetched rows for update. + :param distinct: If True, return distinct rows. """ if is_virtual_doctype(doctype): frappe.throw(_("Virtual DocType {0} cannot be fetched in bulk.").format(doctype)) @@ -210,6 +212,7 @@ def get_docs( order_by=order_by, lock_rows=lock_rows, for_update=for_update, + distinct=distinct, ) # Eagerly fetch all docs @@ -221,6 +224,7 @@ def get_docs( offset=limit_start, lock_rows=lock_rows, child_tables=child_tables, + distinct=distinct, ) return _build_document_objects(controller, all_data, for_update) @@ -238,6 +242,7 @@ def _get_docs_generator( order_by, lock_rows, for_update, + distinct, ) -> Generator[list["Document"]]: fetched_count = 0 current_offset = limit_start @@ -258,6 +263,7 @@ def _get_docs_generator( offset=current_offset, lock_rows=lock_rows, child_tables=child_tables, + distinct=distinct, ) if not chunk_data: @@ -270,7 +276,7 @@ def _get_docs_generator( current_offset += len(chunk_data) -def _fetch_rows(doctype, *, filters, order_by, limit, offset, lock_rows, child_tables): +def _fetch_rows(doctype, *, filters, order_by, limit, offset, lock_rows, child_tables, distinct=False): kwargs = {} if limit is not None: kwargs["limit"] = limit @@ -283,6 +289,7 @@ def _fetch_rows(doctype, *, filters, order_by, limit, offset, lock_rows, child_t fields=["*"], order_by=order_by, for_update=lock_rows, + distinct=distinct, **kwargs, ).run(as_dict=True) From 0ae52b051eb5cf8b3d6abd6b6b082493d21a6e4d Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Wed, 8 Apr 2026 10:50:47 +0530 Subject: [PATCH 06/13] test: use another doctype to avoid test pollution --- frappe/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frappe/tests/test_api.py b/frappe/tests/test_api.py index 5132359b0c..080de33564 100644 --- a/frappe/tests/test_api.py +++ b/frappe/tests/test_api.py @@ -170,7 +170,7 @@ class TestResourceAPI(FrappeAPITestCase): def test_unauthorized_call(self): # test 1: fetch documents without auth - response = requests.get(self.resource(self.DOCTYPE)) + response = requests.get(self.resource("User")) self.assertEqual(response.status_code, 403) def test_get_list(self): From 0d833d658eda851c09fd094aeb9f0034d888adbf Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Wed, 8 Apr 2026 21:12:29 +0530 Subject: [PATCH 07/13] refactor: use `as_iterator` instead of `as_generator` Because it's already used in `db.sql`. So use consistent naming. --- frappe/model/document.py | 10 +++++----- frappe/tests/test_document.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index 25538eb3e4..d0ad8106fa 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -164,22 +164,22 @@ def get_docs( limit: int | None = None, limit_start: int = 0, order_by: str = "creation asc", - as_generator: bool = False, + as_iterator: bool = False, for_update: bool = False, distinct: bool = False, ) -> list["Document"] | Generator[list["Document"]]: """Fetch fully instantiated Document objects from the database. - Returns a list of Documents by default. Pass `as_generator=True` to get + Returns a list of Documents by default. Pass `as_iterator=True` to get a chunked generator that yields a list of Documents per chunk to reduce memory usage. :param doctype: DocType of the records to fetch. :param filters: Dict or list of filters to apply. - :param chunk_size: Number of records to yield per chunk if using `as_generator`. Default 1000. + :param chunk_size: Number of records to yield per chunk if using `as_iterator`. Default 1000. :param limit: Maximum total number of records to fetch. :param limit_start: Start results at record #. Default 0. :param order_by: Order By string, e.g. `creation desc`. - :param as_generator: If True, returns a generator yielding lists of Documents. + :param as_iterator: If True, returns a generator yielding lists of Documents. :param for_update: If True, locks the fetched rows for update. :param distinct: If True, return distinct rows. """ @@ -200,7 +200,7 @@ def get_docs( controller = get_controller(doctype) lock_rows = for_update and frappe.db.db_type != "sqlite" - if as_generator: + if as_iterator: return _get_docs_generator( doctype, controller, diff --git a/frappe/tests/test_document.py b/frappe/tests/test_document.py index 7cdac094bb..478d84cf68 100644 --- a/frappe/tests/test_document.py +++ b/frappe/tests/test_document.py @@ -904,7 +904,7 @@ class TestGetDocs(IntegrationTestCase): self.assertEqual(docs_asc[0].name, docs_desc[-1].name) def test_generator_yields_chunks(self): - chunks = list(frappe.get_docs(self.parent_dt, as_generator=True, chunk_size=2)) + chunks = list(frappe.get_docs(self.parent_dt, as_iterator=True, chunk_size=2)) # 5 records with chunk_size=2 should give 3 chunks (2, 2, 1) self.assertEqual(len(chunks), 3) self.assertEqual(len(chunks[0]), 2) @@ -912,7 +912,7 @@ class TestGetDocs(IntegrationTestCase): self.assertEqual(len(chunks[2]), 1) def test_generator_with_limit(self): - chunks = list(frappe.get_docs(self.parent_dt, as_generator=True, chunk_size=2, limit=3)) + chunks = list(frappe.get_docs(self.parent_dt, as_iterator=True, chunk_size=2, limit=3)) total = sum(len(c) for c in chunks) self.assertEqual(total, 3) @@ -921,7 +921,7 @@ class TestGetDocs(IntegrationTestCase): gen_docs = [ doc for chunk in frappe.get_docs( - self.parent_dt, as_generator=True, chunk_size=2, order_by="creation asc" + self.parent_dt, as_iterator=True, chunk_size=2, order_by="creation asc" ) for doc in chunk ] From a303fbc3eafeb46514696642c1a08762b8f432db Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Wed, 8 Apr 2026 21:17:44 +0530 Subject: [PATCH 08/13] refactor: Consistent API for list/generator Returning chunks is not expected API. Why? Because we should always be able to do: ```python for doc in frappe.get_docs(...): ... ``` --- frappe/model/document.py | 8 ++++---- frappe/tests/test_document.py | 23 +++-------------------- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index d0ad8106fa..2a969fb910 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -167,7 +167,7 @@ def get_docs( as_iterator: bool = False, for_update: bool = False, distinct: bool = False, -) -> list["Document"] | Generator[list["Document"]]: +) -> list["Document"] | Generator["Document"]: """Fetch fully instantiated Document objects from the database. Returns a list of Documents by default. Pass `as_iterator=True` to get @@ -175,7 +175,7 @@ def get_docs( :param doctype: DocType of the records to fetch. :param filters: Dict or list of filters to apply. - :param chunk_size: Number of records to yield per chunk if using `as_iterator`. Default 1000. + :param chunk_size: Number of records to fetch in each chunk if using `as_iterator`. :param limit: Maximum total number of records to fetch. :param limit_start: Start results at record #. Default 0. :param order_by: Order By string, e.g. `creation desc`. @@ -243,7 +243,7 @@ def _get_docs_generator( lock_rows, for_update, distinct, -) -> Generator[list["Document"]]: +) -> Generator["Document"]: fetched_count = 0 current_offset = limit_start @@ -270,7 +270,7 @@ def _get_docs_generator( break built_docs = _build_document_objects(controller, chunk_data, for_update) - yield built_docs + yield from built_docs fetched_count += len(chunk_data) current_offset += len(chunk_data) diff --git a/frappe/tests/test_document.py b/frappe/tests/test_document.py index 478d84cf68..94e2f0e643 100644 --- a/frappe/tests/test_document.py +++ b/frappe/tests/test_document.py @@ -903,28 +903,11 @@ class TestGetDocs(IntegrationTestCase): docs_desc = frappe.get_docs(self.parent_dt, order_by="creation desc") self.assertEqual(docs_asc[0].name, docs_desc[-1].name) - def test_generator_yields_chunks(self): - chunks = list(frappe.get_docs(self.parent_dt, as_iterator=True, chunk_size=2)) - # 5 records with chunk_size=2 should give 3 chunks (2, 2, 1) - self.assertEqual(len(chunks), 3) - self.assertEqual(len(chunks[0]), 2) - self.assertEqual(len(chunks[1]), 2) - self.assertEqual(len(chunks[2]), 1) - - def test_generator_with_limit(self): - chunks = list(frappe.get_docs(self.parent_dt, as_iterator=True, chunk_size=2, limit=3)) - total = sum(len(c) for c in chunks) - self.assertEqual(total, 3) - def test_generator_parity(self): eager = frappe.get_docs(self.parent_dt, order_by="creation asc") - gen_docs = [ - doc - for chunk in frappe.get_docs( - self.parent_dt, as_iterator=True, chunk_size=2, order_by="creation asc" - ) - for doc in chunk - ] + gen_docs = list( + frappe.get_docs(self.parent_dt, as_iterator=True, chunk_size=2, order_by="creation asc") + ) self.assertEqual([d.name for d in eager], [d.name for d in gen_docs]) def test_for_update_sets_flag(self): From 8a0825fe6d7bb849b7ca8598788d9c69d63ab43c Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Wed, 8 Apr 2026 21:27:43 +0530 Subject: [PATCH 09/13] test: don't hardcode throw-away doctype names --- frappe/tests/test_document.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/frappe/tests/test_document.py b/frappe/tests/test_document.py index 94e2f0e643..e28ca060c0 100644 --- a/frappe/tests/test_document.py +++ b/frappe/tests/test_document.py @@ -824,12 +824,9 @@ class TestGetDocs(IntegrationTestCase): cls.child_dt = "Test Get Docs Child" cls.parent_dt = "Test Get Docs Parent" - if not frappe.db.exists("DocType", cls.child_dt): - new_doctype(name=cls.child_dt, istable=1).insert() - - if not frappe.db.exists("DocType", cls.parent_dt): + cls.child_dt = new_doctype(istable=1).insert().name + cls.parent_dt = ( new_doctype( - name=cls.parent_dt, fields=[ {"fieldtype": "Data", "fieldname": "title", "label": "Title"}, { @@ -839,21 +836,22 @@ class TestGetDocs(IntegrationTestCase): "label": "Child Table", }, ], + ) + .insert() + .name + ) + for i in range(5): + frappe.get_doc( + { + "doctype": cls.parent_dt, + "title": f"Record {i}", + "child_table": [ + {"some_fieldname": f"child_{i}_0"}, + {"some_fieldname": f"child_{i}_1"}, + ], + } ).insert() - if not frappe.db.count(cls.parent_dt): - for i in range(5): - frappe.get_doc( - { - "doctype": cls.parent_dt, - "title": f"Record {i}", - "child_table": [ - {"some_fieldname": f"child_{i}_0"}, - {"some_fieldname": f"child_{i}_1"}, - ], - } - ).insert() - @classmethod def tearDownClass(cls): frappe.db.delete(cls.child_dt) From b1a723f514b3c0ef9a9340e9933d0d650c236bcc Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Wed, 8 Apr 2026 21:36:07 +0530 Subject: [PATCH 10/13] refactor: remove redundant `lock_rows` --- frappe/model/document.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index 2a969fb910..3f24ea063b 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -198,7 +198,7 @@ def get_docs( (df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options) ] controller = get_controller(doctype) - lock_rows = for_update and frappe.db.db_type != "sqlite" + for_update = for_update and frappe.db.db_type != "sqlite" if as_iterator: return _get_docs_generator( @@ -210,7 +210,6 @@ def get_docs( limit=limit, limit_start=limit_start, order_by=order_by, - lock_rows=lock_rows, for_update=for_update, distinct=distinct, ) @@ -222,7 +221,7 @@ def get_docs( order_by=order_by, limit=limit, offset=limit_start, - lock_rows=lock_rows, + for_update=for_update, child_tables=child_tables, distinct=distinct, ) @@ -240,7 +239,6 @@ def _get_docs_generator( limit, limit_start, order_by, - lock_rows, for_update, distinct, ) -> Generator["Document"]: @@ -261,7 +259,7 @@ def _get_docs_generator( order_by=order_by, limit=current_chunk_size, offset=current_offset, - lock_rows=lock_rows, + for_update=for_update, child_tables=child_tables, distinct=distinct, ) @@ -276,7 +274,7 @@ def _get_docs_generator( current_offset += len(chunk_data) -def _fetch_rows(doctype, *, filters, order_by, limit, offset, lock_rows, child_tables, distinct=False): +def _fetch_rows(doctype, *, filters, order_by, limit, offset, for_update, child_tables, distinct=False): kwargs = {} if limit is not None: kwargs["limit"] = limit @@ -288,7 +286,7 @@ def _fetch_rows(doctype, *, filters, order_by, limit, offset, lock_rows, child_t filters=filters or {}, fields=["*"], order_by=order_by, - for_update=lock_rows, + for_update=for_update, distinct=distinct, **kwargs, ).run(as_dict=True) @@ -308,7 +306,7 @@ def _fetch_rows(doctype, *, filters, order_by, limit, offset, lock_rows, child_t filters={"parent": ("in", parent_names), "parenttype": doctype, "parentfield": fieldname}, fields=["*"], order_by="idx asc", - for_update=lock_rows, + for_update=for_update, ).run(as_dict=True) for child in child_table_data: From 2364216fb15ea6042102cd7a14c03d00a710b379 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Wed, 8 Apr 2026 21:41:45 +0530 Subject: [PATCH 11/13] fix: Avoid masking in get_docs get_doc, so far doesn't do perm checks by default. Masking is part of permissions. --- frappe/model/document.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index 3f24ea063b..6620884749 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -179,7 +179,7 @@ def get_docs( :param limit: Maximum total number of records to fetch. :param limit_start: Start results at record #. Default 0. :param order_by: Order By string, e.g. `creation desc`. - :param as_iterator: If True, returns a generator yielding lists of Documents. + :param as_iterator: If True, returns a iterator yielding Documents. :param for_update: If True, locks the fetched rows for update. :param distinct: If True, return distinct rows. """ @@ -331,7 +331,6 @@ def _build_document_objects(controller, data: list, for_update: bool): doc = controller(row) if for_update: doc.flags.for_update = True - doc.mask_fields() built_docs.append(doc) return built_docs From 02510e506ae7b9b571e5433c86a1aef2464eb13f Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Wed, 8 Apr 2026 21:59:44 +0530 Subject: [PATCH 12/13] fix: get_docs - Always use iterator internally When `get_docs` output is unknown, we might end up generating queries for child table with `in (...)` containing thousands of doc names. This doesn't fare well with databases, so it's better to chunk it to 1000 by default. This is an acceptable tradeoff IMO. --- frappe/model/document.py | 44 ++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index 6620884749..e1c60bbd78 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -194,39 +194,32 @@ def get_docs( if limit_start and limit is None: frappe.throw(_("limit cannot be None when limit_start is used")) + if not order_by: + # Sort order is mandatory for iterator logic + order_by = "name asc" + child_tables = [ (df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options) ] controller = get_controller(doctype) for_update = for_update and frappe.db.db_type != "sqlite" - if as_iterator: - return _get_docs_generator( - doctype, - controller, - child_tables, - filters=filters, - chunk_size=chunk_size, - limit=limit, - limit_start=limit_start, - order_by=order_by, - for_update=for_update, - distinct=distinct, - ) - - # Eagerly fetch all docs - all_data = _fetch_rows( + iterator = _get_docs_generator( doctype, + controller, + child_tables, filters=filters, - order_by=order_by, + chunk_size=chunk_size, limit=limit, - offset=limit_start, + limit_start=limit_start, + order_by=order_by, for_update=for_update, - child_tables=child_tables, distinct=distinct, ) - return _build_document_objects(controller, all_data, for_update) + if as_iterator: + return iterator + return list(iterator) def _get_docs_generator( @@ -267,8 +260,7 @@ def _get_docs_generator( if not chunk_data: break - built_docs = _build_document_objects(controller, chunk_data, for_update) - yield from built_docs + yield from _build_document_objects(controller, chunk_data, for_update) fetched_count += len(chunk_data) current_offset += len(chunk_data) @@ -323,17 +315,11 @@ def _fetch_rows(doctype, *, filters, order_by, limit, offset, for_update, child_ def _build_document_objects(controller, data: list, for_update: bool): - if not data: - return [] - - built_docs = [] for row in data: doc = controller(row) if for_update: doc.flags.for_update = True - built_docs.append(doc) - - return built_docs + yield doc def get_doc_permission_check(doc: "Document", check_permission: str | bool | None = None) -> "Document": From 9c77848b814d52b8efbac74ca5c320b1779483a5 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Thu, 9 Apr 2026 09:59:53 +0530 Subject: [PATCH 13/13] refactor: Simpler iterator implementation using itertools --- frappe/model/document.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/frappe/model/document.py b/frappe/model/document.py index e1c60bbd78..50d51e0d83 100644 --- a/frappe/model/document.py +++ b/frappe/model/document.py @@ -182,6 +182,10 @@ def get_docs( :param as_iterator: If True, returns a iterator yielding Documents. :param for_update: If True, locks the fetched rows for update. :param distinct: If True, return distinct rows. + + + Note: Chunk size controls memory usage vs # of queries tradeoff. Using chunk size larger than + 10,000 is not advisable. """ if is_virtual_doctype(doctype): frappe.throw(_("Virtual DocType {0} cannot be fetched in bulk.").format(doctype)) @@ -210,13 +214,14 @@ def get_docs( child_tables, filters=filters, chunk_size=chunk_size, - limit=limit, limit_start=limit_start, order_by=order_by, for_update=for_update, distinct=distinct, ) + iterator = itertools.islice(iterator, limit) + if as_iterator: return iterator return list(iterator) @@ -229,41 +234,28 @@ def _get_docs_generator( *, filters, chunk_size, - limit, limit_start, order_by, for_update, distinct, ) -> Generator["Document"]: - fetched_count = 0 - current_offset = limit_start + offset = limit_start while True: - current_chunk_size = chunk_size - if limit is not None: - remaining = limit - fetched_count - if remaining <= 0: - break - current_chunk_size = min(chunk_size, remaining) - chunk_data = _fetch_rows( doctype, filters=filters, order_by=order_by, - limit=current_chunk_size, - offset=current_offset, + limit=chunk_size, + offset=offset, for_update=for_update, child_tables=child_tables, distinct=distinct, ) - if not chunk_data: break - yield from _build_document_objects(controller, chunk_data, for_update) - - fetched_count += len(chunk_data) - current_offset += len(chunk_data) + offset += chunk_size def _fetch_rows(doctype, *, filters, order_by, limit, offset, for_update, child_tables, distinct=False):