From a62cc408852c9f3e3cd5e81c3a15f40ccbc64d82 Mon Sep 17 00:00:00 2001 From: leela Date: Thu, 11 Mar 2021 09:42:54 +0530 Subject: [PATCH] perf: Improve import template performance by 10 times Removed nested forloop to get the performace and also using generators to reduce memory usage. --- frappe/core/doctype/data_import/exporter.py | 27 ++++++++++----------- frappe/utils/__init__.py | 26 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/frappe/core/doctype/data_import/exporter.py b/frappe/core/doctype/data_import/exporter.py index 66e32a1270..acaa294a6f 100644 --- a/frappe/core/doctype/data_import/exporter.py +++ b/frappe/core/doctype/data_import/exporter.py @@ -2,13 +2,15 @@ # Copyright (c) 2019, Frappe Technologies Pvt. Ltd. and Contributors # MIT License. See license.txt +import typing + import frappe from frappe.model import ( display_fieldtypes, no_value_fields, table_fields as table_fieldtypes, ) -from frappe.utils import flt, format_duration +from frappe.utils import flt, format_duration, groupby_metric from frappe.utils.csvutils import build_csv_response from frappe.utils.xlsxutils import build_xlsx_response @@ -116,7 +118,6 @@ class Exporter: def get_data_to_export(self): frappe.permissions.can_export(self.doctype, raise_exception=True) - data_to_export = [] table_fields = [f for f in self.exportable_fields if f != self.doctype] data = self.get_data_as_docs() @@ -128,14 +129,13 @@ class Exporter: if table_fields: # add child table data for f in table_fields: - for i, child_row in enumerate(doc[f]): + for i, child_row in enumerate(doc.get(f, [])): table_df = self.meta.get_field(f) child_doctype = table_df.options rows = self.add_data_row(child_doctype, child_row.parentfield, child_row, rows, i) - data_to_export += rows - - return data_to_export + for row in rows: + yield row def add_data_row(self, doctype, parentfield, doc, rows, row_idx): if len(rows) < row_idx + 1: @@ -204,17 +204,13 @@ class Exporter: ) child_data[key] = data - return self.merge_data(parent_data, child_data) - - def merge_data(self, parent_data, child_data): + # Group children data by parent name + grouped_children_data = self.group_children_data_by_parent(child_data) for doc in parent_data: - for table_field, table_rows in child_data.items(): - doc[table_field] = [row for row in table_rows if row.parent == doc.name] - - return parent_data + related_children_docs = grouped_children_data.get(doc.name, {}) + yield {**doc, **related_children_docs} def add_header(self): - header = [] for df in self.fields: is_parent = not df.is_child_table_field @@ -261,3 +257,6 @@ class Exporter: def build_xlsx_response(self): build_xlsx_response(self.get_csv_array_for_export(), self.doctype) + + def group_children_data_by_parent(self, children_data: typing.Dict[str, list]): + return groupby_metric(children_data, key='parent') diff --git a/frappe/utils/__init__.py b/frappe/utils/__init__.py index 1ff9da0ca9..728028622b 100644 --- a/frappe/utils/__init__.py +++ b/frappe/utils/__init__.py @@ -11,6 +11,7 @@ import os import re import sys import traceback +import typing from email.header import decode_header, make_header from email.utils import formataddr, parseaddr @@ -763,3 +764,28 @@ def get_bench_relative_path(file_path): sys.exit(1) return os.path.abspath(file_path) + + +def groupby_metric(iterable: typing.Dict[str, list], key: str): + """ Group records by a metric. + + Usecase: Lets assume we got country wise players list with the ranking given for each player(multiple players in a country can have same ranking aswell). + We can group the players by ranking(can be any other metric) using this function. + + >>> d = { + 'india': [{'id':1, 'name': 'iplayer-1', 'ranking': 1}, {'id': 2, 'ranking': 1, 'name': 'iplayer-2'}, {'id': 2, 'ranking': 2, 'name': 'iplayer-3'}], + 'Aus': [{'id':1, 'name': 'aplayer-1', 'ranking': 1}, {'id': 2, 'ranking': 1, 'name': 'aplayer-2'}, {'id': 2, 'ranking': 2, 'name': 'aplayer-3'}] + } + >>> groupby(d, key='ranking') + {1: {'Aus': [{'id': 1, 'name': 'aplayer-1', 'ranking': 1}, + {'id': 2, 'name': 'aplayer-2', 'ranking': 1}], + 'india': [{'id': 1, 'name': 'iplayer-1', 'ranking': 1}, + {'id': 2, 'name': 'iplayer-2', 'ranking': 1}]}, + 2: {'Aus': [{'id': 2, 'name': 'aplayer-3', 'ranking': 2}], + 'india': [{'id': 2, 'name': 'iplayer-3', 'ranking': 2}]}} + """ + records = {} + for category, items in iterable.items(): + for item in items: + records.setdefault(item[key], {}).setdefault(category, []).append(item) + return records