fix: get_docs - Always use iterator internally
When `get_docs` output is unknown, we might end up generating queries for child table with `in (...)` containing thousands of doc names. This doesn't fare well with databases, so it's better to chunk it to 1000 by default. This is an acceptable tradeoff IMO.
This commit is contained in:
parent
2364216fb1
commit
02510e506a
1 changed files with 15 additions and 29 deletions
|
|
@ -194,39 +194,32 @@ def get_docs(
|
|||
if limit_start and limit is None:
|
||||
frappe.throw(_("limit cannot be None when limit_start is used"))
|
||||
|
||||
if not order_by:
|
||||
# Sort order is mandatory for iterator logic
|
||||
order_by = "name asc"
|
||||
|
||||
child_tables = [
|
||||
(df.fieldname, df.options) for df in meta.get_table_fields() if not is_virtual_doctype(df.options)
|
||||
]
|
||||
controller = get_controller(doctype)
|
||||
for_update = for_update and frappe.db.db_type != "sqlite"
|
||||
|
||||
if as_iterator:
|
||||
return _get_docs_generator(
|
||||
doctype,
|
||||
controller,
|
||||
child_tables,
|
||||
filters=filters,
|
||||
chunk_size=chunk_size,
|
||||
limit=limit,
|
||||
limit_start=limit_start,
|
||||
order_by=order_by,
|
||||
for_update=for_update,
|
||||
distinct=distinct,
|
||||
)
|
||||
|
||||
# Eagerly fetch all docs
|
||||
all_data = _fetch_rows(
|
||||
iterator = _get_docs_generator(
|
||||
doctype,
|
||||
controller,
|
||||
child_tables,
|
||||
filters=filters,
|
||||
order_by=order_by,
|
||||
chunk_size=chunk_size,
|
||||
limit=limit,
|
||||
offset=limit_start,
|
||||
limit_start=limit_start,
|
||||
order_by=order_by,
|
||||
for_update=for_update,
|
||||
child_tables=child_tables,
|
||||
distinct=distinct,
|
||||
)
|
||||
|
||||
return _build_document_objects(controller, all_data, for_update)
|
||||
if as_iterator:
|
||||
return iterator
|
||||
return list(iterator)
|
||||
|
||||
|
||||
def _get_docs_generator(
|
||||
|
|
@ -267,8 +260,7 @@ def _get_docs_generator(
|
|||
if not chunk_data:
|
||||
break
|
||||
|
||||
built_docs = _build_document_objects(controller, chunk_data, for_update)
|
||||
yield from built_docs
|
||||
yield from _build_document_objects(controller, chunk_data, for_update)
|
||||
|
||||
fetched_count += len(chunk_data)
|
||||
current_offset += len(chunk_data)
|
||||
|
|
@ -323,17 +315,11 @@ def _fetch_rows(doctype, *, filters, order_by, limit, offset, for_update, child_
|
|||
|
||||
|
||||
def _build_document_objects(controller, data: list, for_update: bool):
|
||||
if not data:
|
||||
return []
|
||||
|
||||
built_docs = []
|
||||
for row in data:
|
||||
doc = controller(row)
|
||||
if for_update:
|
||||
doc.flags.for_update = True
|
||||
built_docs.append(doc)
|
||||
|
||||
return built_docs
|
||||
yield doc
|
||||
|
||||
|
||||
def get_doc_permission_check(doc: "Document", check_permission: str | bool | None = None) -> "Document":
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue