fix: get_docs - Always use iterator internally
When `get_docs` output is unknown, we might end up generating queries for child table with `in (...)` containing thousands of doc names. This doesn't fare well with databases, so it's better to chunk it to 1000 by default. This is an acceptable tradeoff IMO.
This commit is contained in:
parent
2364216fb1
commit
02510e506a
1 changed files with 15 additions and 29 deletions
|
|
@ -194,14 +194,17 @@ def get_docs(
|
||||||
if limit_start and limit is None:
|
if limit_start and limit is None:
|
||||||
frappe.throw(_("limit cannot be None when limit_start is used"))
|
frappe.throw(_("limit cannot be None when limit_start is used"))
|
||||||
|
|
||||||
|
if not order_by:
|
||||||
|
# Sort order is mandatory for iterator logic
|
||||||
|
order_by = "name asc"
|
||||||
|
|
||||||
child_tables = [
|
child_tables = [
|
||||||
(df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options)
|
(df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options)
|
||||||
]
|
]
|
||||||
controller = get_controller(doctype)
|
controller = get_controller(doctype)
|
||||||
for_update = for_update and frappe.db.db_type != "sqlite"
|
for_update = for_update and frappe.db.db_type != "sqlite"
|
||||||
|
|
||||||
if as_iterator:
|
iterator = _get_docs_generator(
|
||||||
return _get_docs_generator(
|
|
||||||
doctype,
|
doctype,
|
||||||
controller,
|
controller,
|
||||||
child_tables,
|
child_tables,
|
||||||
|
|
@ -214,19 +217,9 @@ def get_docs(
|
||||||
distinct=distinct,
|
distinct=distinct,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Eagerly fetch all docs
|
if as_iterator:
|
||||||
all_data = _fetch_rows(
|
return iterator
|
||||||
doctype,
|
return list(iterator)
|
||||||
filters=filters,
|
|
||||||
order_by=order_by,
|
|
||||||
limit=limit,
|
|
||||||
offset=limit_start,
|
|
||||||
for_update=for_update,
|
|
||||||
child_tables=child_tables,
|
|
||||||
distinct=distinct,
|
|
||||||
)
|
|
||||||
|
|
||||||
return _build_document_objects(controller, all_data, for_update)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_docs_generator(
|
def _get_docs_generator(
|
||||||
|
|
@ -267,8 +260,7 @@ def _get_docs_generator(
|
||||||
if not chunk_data:
|
if not chunk_data:
|
||||||
break
|
break
|
||||||
|
|
||||||
built_docs = _build_document_objects(controller, chunk_data, for_update)
|
yield from _build_document_objects(controller, chunk_data, for_update)
|
||||||
yield from built_docs
|
|
||||||
|
|
||||||
fetched_count += len(chunk_data)
|
fetched_count += len(chunk_data)
|
||||||
current_offset += len(chunk_data)
|
current_offset += len(chunk_data)
|
||||||
|
|
@ -323,17 +315,11 @@ def _fetch_rows(doctype, *, filters, order_by, limit, offset, for_update, child_
|
||||||
|
|
||||||
|
|
||||||
def _build_document_objects(controller, data: list, for_update: bool):
|
def _build_document_objects(controller, data: list, for_update: bool):
|
||||||
if not data:
|
|
||||||
return []
|
|
||||||
|
|
||||||
built_docs = []
|
|
||||||
for row in data:
|
for row in data:
|
||||||
doc = controller(row)
|
doc = controller(row)
|
||||||
if for_update:
|
if for_update:
|
||||||
doc.flags.for_update = True
|
doc.flags.for_update = True
|
||||||
built_docs.append(doc)
|
yield doc
|
||||||
|
|
||||||
return built_docs
|
|
||||||
|
|
||||||
|
|
||||||
def get_doc_permission_check(doc: "Document", check_permission: str | bool | None = None) -> "Document":
|
def get_doc_permission_check(doc: "Document", check_permission: str | bool | None = None) -> "Document":
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue