diff --git a/frappe/cache_manager.py b/frappe/cache_manager.py index 4560680653..92d12289c6 100644 --- a/frappe/cache_manager.py +++ b/frappe/cache_manager.py @@ -24,7 +24,7 @@ user_cache_keys = ("bootinfo", "user_recent", "roles", "user_doc", "lang", "has_role:Page", "has_role:Report") doctype_cache_keys = ("meta", "form_meta", "table_columns", "last_modified", - "linked_doctypes", 'notifications', 'workflow' ,'energy_point_rule_map') + "linked_doctypes", 'notifications', 'workflow' ,'energy_point_rule_map', 'data_import_column_header_map') def clear_user_cache(user=None): diff --git a/frappe/core/doctype/data_import_beta/data_import_beta.css b/frappe/core/doctype/data_import_beta/data_import_beta.css new file mode 100644 index 0000000000..5206540a33 --- /dev/null +++ b/frappe/core/doctype/data_import_beta/data_import_beta.css @@ -0,0 +1,3 @@ +.warnings .warning { + margin-bottom: 40px; +} diff --git a/frappe/core/doctype/data_import_beta/data_import_beta.js b/frappe/core/doctype/data_import_beta/data_import_beta.js index 72404c74f4..1c648621d8 100644 --- a/frappe/core/doctype/data_import_beta/data_import_beta.js +++ b/frappe/core/doctype/data_import_beta/data_import_beta.js @@ -57,7 +57,7 @@ frappe.ui.form.on('Data Import Beta', { frm.set_query('reference_doctype', () => { return { filters: { - allow_import: 1 + name: ['in', frappe.boot.user.can_import] } }; }); @@ -236,7 +236,7 @@ frappe.ui.form.on('Data Import Beta', { frm .call({ method: 'get_preview_from_template', - args: { data_import: frm.doc.name }, + args: { data_import: frm.doc.name, import_file: frm.doc.import_file }, error_handlers: { TimestampMismatchError() { // ignore this error @@ -331,8 +331,8 @@ frappe.ui.form.on('Data Import Beta', { }) .join(''); return ` -
-
${__('Row {0}', [row_number])}
+
+
${__('Row {0}', [row_number])}
    ${message}
`; @@ -346,8 +346,8 @@ frappe.ui.form.on('Data Import Beta', { header = __('Column {0}', [warning.col]); } return ` -
-
${header}
+
+
${header}
${warning.message}
`; @@ -355,7 +355,7 @@ frappe.ui.form.on('Data Import Beta', { .join(''); frm.get_field('import_warnings').$wrapper.html(`
-
${html}
+
${html}
`); }, diff --git a/frappe/core/doctype/data_import_beta/data_import_beta.json b/frappe/core/doctype/data_import_beta/data_import_beta.json index 777af0a071..8876d2246a 100644 --- a/frappe/core/doctype/data_import_beta/data_import_beta.json +++ b/frappe/core/doctype/data_import_beta/data_import_beta.json @@ -16,11 +16,11 @@ "submit_after_import", "mute_emails", "template_options", - "section_import_preview", - "import_preview", "import_warnings_section", "template_warnings", "import_warnings", + "section_import_preview", + "import_preview", "import_log_section", "import_log", "show_failed_logs", @@ -34,7 +34,9 @@ "label": "Document Type", "options": "DocType", "reqd": 1, - "set_only_once": 1 + "set_only_once": 1, + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "import_type", @@ -43,28 +45,38 @@ "label": "Import Type", "options": "\nInsert New Records\nUpdate Existing Records", "reqd": 1, - "set_only_once": 1 + "set_only_once": 1, + "show_days": 1, + "show_seconds": 1 }, { "depends_on": "eval:!doc.__islocal", "fieldname": "import_file", "fieldtype": "Attach", "in_list_view": 1, - "label": "Import File" + "label": "Import File", + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "import_preview", "fieldtype": "HTML", - "label": "Import Preview" + "label": "Import Preview", + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "section_import_preview", "fieldtype": "Section Break", - "label": "Preview" + "label": "Preview", + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "column_break_5", - "fieldtype": "Column Break" + "fieldtype": "Column Break", + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "template_options", @@ -72,23 +84,31 @@ "hidden": 1, "label": "Template Options", "options": "JSON", - "read_only": 1 + "read_only": 1, + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "import_log", "fieldtype": "Code", "label": "Import Log", - "options": "JSON" + "options": "JSON", + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "import_log_section", "fieldtype": "Section Break", - "label": "Import Log" + "label": "Import Log", + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "import_log_preview", "fieldtype": "HTML", - "label": "Import Log Preview" + "label": "Import Log Preview", + "show_days": 1, + "show_seconds": 1 }, { "default": "Pending", @@ -97,56 +117,72 @@ "hidden": 1, "label": "Status", "options": "Pending\nSuccess\nPartial Success\nError", - "read_only": 1 + "read_only": 1, + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "template_warnings", "fieldtype": "Code", "hidden": 1, "label": "Template Warnings", - "options": "JSON" + "options": "JSON", + "show_days": 1, + "show_seconds": 1 }, { "default": "0", "fieldname": "submit_after_import", "fieldtype": "Check", "label": "Submit After Import", - "set_only_once": 1 + "set_only_once": 1, + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "import_warnings_section", "fieldtype": "Section Break", - "label": "Warnings" + "label": "Warnings", + "show_days": 1, + "show_seconds": 1 }, { "fieldname": "import_warnings", "fieldtype": "HTML", - "label": "Import Warnings" + "label": "Import Warnings", + "show_days": 1, + "show_seconds": 1 }, { "depends_on": "reference_doctype", "fieldname": "download_template", "fieldtype": "Button", - "label": "Download Template" + "label": "Download Template", + "show_days": 1, + "show_seconds": 1 }, { "default": "1", "fieldname": "mute_emails", "fieldtype": "Check", "label": "Don't Send Emails", - "set_only_once": 1 + "set_only_once": 1, + "show_days": 1, + "show_seconds": 1 }, { "default": "0", "fieldname": "show_failed_logs", "fieldtype": "Check", - "label": "Show Failed Logs" + "label": "Show Failed Logs", + "show_days": 1, + "show_seconds": 1 } ], "hide_toolbar": 1, "links": [], - "modified": "2020-02-17 15:35:04.386098", - "modified_by": "faris@erpnext.com", + "modified": "2020-05-28 22:11:38.266208", + "modified_by": "Administrator", "module": "Core", "name": "Data Import Beta", "owner": "Administrator", diff --git a/frappe/core/doctype/data_import_beta/data_import_beta.py b/frappe/core/doctype/data_import_beta/data_import_beta.py index 8f12bd20ed..23e0681011 100644 --- a/frappe/core/doctype/data_import_beta/data_import_beta.py +++ b/frappe/core/doctype/data_import_beta/data_import_beta.py @@ -5,8 +5,9 @@ from __future__ import unicode_literals import frappe from frappe.model.document import Document -from frappe.core.doctype.data_import.importer_new import Importer -from frappe.core.doctype.data_import.exporter_new import Exporter + +from frappe.core.doctype.data_import_beta.importer import Importer +from frappe.core.doctype.data_import_beta.exporter import Exporter from frappe.core.page.background_jobs.background_jobs import get_info from frappe.utils.background_jobs import enqueue from frappe import _ @@ -25,7 +26,10 @@ class DataImportBeta(Document): # validate template self.get_importer() - def get_preview_from_template(self): + def get_preview_from_template(self, import_file=None): + if import_file: + self.import_file = import_file + if not self.import_file: return @@ -62,8 +66,8 @@ class DataImportBeta(Document): @frappe.whitelist() -def get_preview_from_template(data_import): - return frappe.get_doc("Data Import Beta", data_import).get_preview_from_template() +def get_preview_from_template(data_import, import_file): + return frappe.get_doc("Data Import Beta", data_import).get_preview_from_template(import_file) @frappe.whitelist() @@ -81,8 +85,8 @@ def start_import(data_import): frappe.db.rollback() data_import.db_set("status", "Error") frappe.log_error(title=data_import.name) - frappe.db.commit() - frappe.publish_realtime("data_import_refresh", {"data_import": data_import.name}) + + frappe.publish_realtime("data_import_refresh", {"data_import": data_import.name}) @frappe.whitelist() diff --git a/frappe/core/doctype/data_import_beta/importer_new.py b/frappe/core/doctype/data_import_beta/importer_new.py index cbb2ee482b..02721fb93f 100644 --- a/frappe/core/doctype/data_import_beta/importer_new.py +++ b/frappe/core/doctype/data_import_beta/importer_new.py @@ -1,12 +1,12 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019, Frappe Technologies Pvt. Ltd. and Contributors +# Copyright (c) 2020, Frappe Technologies Pvt. Ltd. and Contributors # MIT License. See license.txt -import io +from __future__ import unicode_literals import os -import json -import timeit +import io import frappe +import timeit +import json from datetime import datetime from frappe import _ from frappe.utils import cint, flt, update_progress_bar, cstr, DATETIME_FORMAT @@ -15,65 +15,406 @@ from frappe.utils.xlsxutils import ( read_xlsx_file_from_attached_file, read_xls_file_from_attached_file, ) -from frappe.model import no_value_fields, table_fields +from frappe.model import no_value_fields, table_fields as table_fieldtypes -INVALID_VALUES = ["", None] +INVALID_VALUES = ("", None) MAX_ROWS_IN_PREVIEW = 10 INSERT = "Insert New Records" UPDATE = "Update Existing Records" -# pylint: disable=R0201 + class Importer: - def __init__( - self, doctype, data_import=None, file_path=None, content=None, console=False - ): + def __init__(self, doctype, data_import=None, import_type=None, console=False): self.doctype = doctype - self.template_options = frappe._dict({"remap_column": {}}) self.console = console - if data_import: - self.data_import = data_import - if self.data_import.template_options: - template_options = frappe.parse_json(self.data_import.template_options) - self.template_options.update(template_options) - self.import_type = self.data_import.import_type + self.data_import = data_import + if not self.data_import: + self.data_import = frappe.get_doc(doctype="Data Import Beta") + if import_type: + self.data_import.import_type = import_type + + self.template_options = frappe.parse_json(self.data_import.template_options or "{}") + self.import_type = self.data_import.import_type + + self.import_file = ImportFile( + doctype, data_import.import_file, self.template_options, self.import_type + ) + + def get_data_for_import_preview(self): + return self.import_file.get_data_for_import_preview() + + def before_import(self): + # set user lang for translations + frappe.cache().hdel("lang", frappe.session.user) + frappe.set_user_lang(frappe.session.user) + + # set flags + frappe.flags.in_import = True + frappe.flags.mute_emails = self.data_import.mute_emails + + self.data_import.db_set("template_warnings", "") + + def import_data(self): + self.before_import() + + # parse docs from rows + payloads = self.import_file.get_payloads_for_import() + + # dont import if there are non-ignorable warnings + warnings = self.import_file.get_warnings() + warnings = [w for w in warnings if w.get("type") != "info"] + + print(warnings) + + if warnings: + if self.console: + self.print_grouped_warnings(warnings) + else: + self.data_import.db_set("template_warnings", json.dumps(warnings)) + return + + # setup import log + if self.data_import.import_log: + import_log = frappe.parse_json(self.data_import.import_log) else: - self.data_import = None + import_log = [] - self.import_type = self.import_type or INSERT + # remove previous failures from import log + import_log = [l for l in import_log if l.get("success") == True] - self.header_row = None - self.data = None - # used to store date formats guessed from data rows per column - self._guessed_date_formats = {} - # used to store eta during import - self.last_eta = 0 - # used to collect warnings during template parsing - # and show them to user - self.warnings = [] - self.meta = frappe.get_meta(doctype) - self.prepare_content(file_path, content) + # get successfully imported rows + imported_rows = [] + for log in import_log: + log = frappe._dict(log) + if log.success: + imported_rows += log.row_indexes + + # start import + total_payload_count = len(payloads) + batch_size = frappe.conf.data_import_batch_size or 1000 + + for batch_index, batched_payloads in enumerate( + frappe.utils.create_batch(payloads, batch_size) + ): + for i, payload in enumerate(batched_payloads): + doc = payload.doc + row_indexes = [row.row_number for row in payload.rows] + current_index = (i + 1) + (batch_index * batch_size) + + if set(row_indexes).intersection(set(imported_rows)): + print("Skipping imported rows", row_indexes) + if total_payload_count > 5: + frappe.publish_realtime( + "data_import_progress", + { + "current": current_index, + "total": total_payload_count, + "skipping": True, + "data_import": self.data_import.name, + }, + ) + continue + + try: + start = timeit.default_timer() + doc = self.process_doc(doc) + processing_time = timeit.default_timer() - start + eta = self.get_eta(current_index, total_payload_count, processing_time) + + if self.console: + update_progress_bar( + "Importing {0} records".format(total_payload_count), + current_index, + total_payload_count, + ) + elif total_payload_count > 5: + frappe.publish_realtime( + "data_import_progress", + { + "current": current_index, + "total": total_payload_count, + "docname": doc.name, + "data_import": self.data_import.name, + "success": True, + "row_indexes": row_indexes, + "eta": eta, + }, + ) + + import_log.append( + frappe._dict(success=True, docname=doc.name, row_indexes=row_indexes) + ) + # commit after every successful import + frappe.db.commit() + + except Exception: + import_log.append( + frappe._dict( + success=False, + exception=frappe.get_traceback(), + messages=frappe.local.message_log, + row_indexes=row_indexes, + ) + ) + frappe.clear_messages() + # rollback if exception + frappe.db.rollback() + + # set status + failures = [l for l in import_log if l.get("success") == False] + if len(failures) == total_payload_count: + status = "Pending" + elif len(failures) > 0: + status = "Partial Success" + else: + status = "Success" + + if self.console: + self.print_import_log(import_log) + else: + self.data_import.db_set("status", status) + self.data_import.db_set("import_log", json.dumps(import_log)) + + self.after_import() + + return import_log + + def after_import(self): + frappe.flags.in_import = False + frappe.flags.mute_emails = False + + def process_doc(self, doc): + if self.import_type == INSERT: + return self.insert_record(doc) + elif self.import_type == UPDATE: + return self.update_record(doc) + + def insert_record(self, doc): + meta = frappe.get_meta(self.doctype) + new_doc = frappe.new_doc(self.doctype) + new_doc.update(doc) + + if (meta.autoname or "").lower() != "prompt": + # name can only be set directly if autoname is prompt + new_doc.set("name", None) + + new_doc.flags.updater_reference = { + "doctype": self.data_import.doctype, + "docname": self.data_import.name, + "label": _("via Data Import"), + } + + new_doc.insert() + if meta.is_submittable and self.data_import.submit_after_import: + new_doc.submit() + return new_doc + + def update_record(self, doc): + existing_doc = frappe.get_doc(self.doctype, doc["name"]) + existing_doc.flags.updater_reference = { + "doctype": self.data_import.doctype, + "docname": self.data_import.name, + "label": _("via Data Import"), + } + existing_doc.update(doc) + existing_doc.save() + return existing_doc + + def get_eta(self, current, total, processing_time): + self.last_eta = getattr(self, "last_eta", 0) + remaining = total - current + eta = processing_time * remaining + if not self.last_eta or eta < self.last_eta: + self.last_eta = eta + return self.last_eta + + +class ImportFile: + def __init__(self, doctype, file, template_options=None, import_type=None): + self.doctype = doctype + self.template_options = template_options or frappe._dict( + column_to_field_map=frappe._dict() + ) + self.column_to_field_map = self.template_options.column_to_field_map + self.import_type = import_type + + self.file_doc = self.file_path = None + if isinstance(file, frappe.model.document.Document) and file.doctype == "File": + self.file_doc = file + elif isinstance(file, frappe.string_types): + if frappe.db.exists("File", {"file_url": file}): + self.file_doc = frappe.get_doc("File", {"file_url": file}) + elif os.path.exists(file): + self.file_path = file + + if not self.file_doc and not self.file_path: + frappe.throw(_("Invalid template file for import")) + + self.raw_data = self.get_data_from_template_file() self.parse_data_from_template() - def prepare_content(self, file_path, content): + def get_data_from_template_file(self): + content = None extension = None - if self.data_import and self.data_import.import_file: - file_doc = frappe.get_doc("File", {"file_url": self.data_import.import_file}) - parts = file_doc.get_extension() + + if self.file_doc: + parts = self.file_doc.get_extension() extension = parts[1] - content = file_doc.get_content() + content = self.file_doc.get_content() extension = extension.lstrip(".") - if file_path: - content, extension = self.read_file(file_path) + elif self.file_path: + content, extension = self.read_file(self.file_path) + + if not content: + frappe.throw(_("Invalid or corrupted content for import")) if not extension: extension = "csv" if content: - self.read_content(content, extension) + return self.read_content(content, extension) - self.validate_template_content() + def parse_data_from_template(self): + header = None + data = [] + + for i, row in enumerate(self.raw_data): + if all(v in INVALID_VALUES for v in row): + # empty row + continue + + if not header: + header = Header(i, row, self.doctype, self.raw_data, self.column_to_field_map) + else: + row_obj = Row(i, row, self.doctype, header, self.import_type) + data.append(row_obj) + + self.header = header + self.columns = self.header.columns + self.data = data + + if len(data) <= 1: + frappe.throw( + _("Import template should contain a Header and atleast one row."), + title=_("Template Error"), + ) + + def get_data_for_import_preview(self): + """Adds a serial number column as the first column""" + + columns = [frappe._dict({"header_title": "Sr. No", "skip_import": True})] + columns += [col.as_dict() for col in self.columns] + data = [[row.row_number] + row.as_list() for row in self.data] + + warnings = self.get_warnings() + + out = frappe._dict() + out.data = data + out.columns = columns + out.warnings = warnings + total_number_of_rows = len(out.data) + if total_number_of_rows > MAX_ROWS_IN_PREVIEW: + out.data = out.data[:MAX_ROWS_IN_PREVIEW] + out.max_rows_exceeded = True + out.max_rows_in_preview = MAX_ROWS_IN_PREVIEW + out.total_number_of_rows = total_number_of_rows + return out + + def get_payloads_for_import(self): + payloads = [] + # make a copy + data = list(self.data) + while data: + doc, rows, data = self.parse_next_row_for_import(data) + payloads.append(frappe._dict(doc=doc, rows=rows)) + return payloads + + def parse_next_row_for_import(self, data): + """ + Parses rows that make up a doc. A doc maybe built from a single row or multiple rows. + Returns the doc, rows, and data without the rows. + """ + doctypes = self.header.doctypes + + # first row is included by default + first_row = data[0] + rows = [first_row] + + # if there are child doctypes, find the subsequent rows + if len(doctypes) > 1: + # subsequent rows either dont have any parent value set + # or have the same value as the parent row + # we include a row if either of conditions match + parent_column_indexes = self.header.get_column_indexes(self.doctype) + parent_row_values = first_row.get_values(parent_column_indexes) + + data_without_first_row = data[1:] + for row in data_without_first_row: + row_values = row.get_values(parent_column_indexes) + # if the row is blank, it's a child row doc + if all([v in INVALID_VALUES for v in row_values]): + rows.append(row) + continue + # if the row has same values as parent row, it's a child row doc + if row_values == parent_row_values: + rows.append(row) + continue + # if any of those conditions dont match, it's the next doc + break + + parsed_docs = {} + parent_doc = None + for row in rows: + for doctype, table_df in doctypes: + if doctype == self.doctype and not parent_doc: + parent_doc = row.parse_doc(doctype) + + if doctype != self.doctype and table_df: + child_doc = row.parse_doc(doctype, parent_doc, table_df) + parent_doc[table_df.fieldname] = parent_doc.get(table_df.fieldname, []) + parent_doc[table_df.fieldname].append(child_doc) + + doc = parent_doc + # check if there is atleast one row for mandatory table fields + meta = frappe.get_meta(self.doctype) + mandatory_table_fields = [ + df + for df in meta.fields + if df.fieldtype in table_fieldtypes + and df.reqd + and len(doc.get(df.fieldname, [])) == 0 + ] + if len(mandatory_table_fields) == 1: + self.warnings.append( + { + "row": first_row.row_number, + "message": _("There should be atleast one row for {0} table").format( + mandatory_table_fields[0].label + ), + } + ) + elif mandatory_table_fields: + fields_string = ", ".join([df.label for df in mandatory_table_fields]) + message = _("There should be atleast one row for the following tables: {0}").format( + fields_string + ) + self.warnings.append({"row": first_row.row_number, "message": message}) + + return doc, rows, data[len(rows) :] + + def get_warnings(self): + warnings = [] + for col in self.header.columns: + warnings += col.warnings + + for row in self.data: + warnings += row.warnings + + return warnings + + ###### def read_file(self, file_path): extn = file_path.split(".")[1] @@ -98,18 +439,10 @@ class Importer: elif extension == "xls": data = read_xls_file_from_attached_file(content) - data = self.remove_empty_rows_and_columns(data) - - if len(data) <= 1: - frappe.throw( - _("Import template should contain a Header and atleast one row."), title=error_title - ) - - self.header_row = data[0] - self.data = data[1:] + return data def validate_template_content(self): - column_count = len(self.header_row) + column_count = len(self.columns) if any([len(row) != column_count and len(row) != 0 for row in self.data]): frappe.throw( _("Number of columns does not match with data"), title=_("Invalid Template") @@ -151,45 +484,324 @@ class Importer: return data_without_empty_rows_and_columns - def get_data_for_import_preview(self): - out = frappe._dict() - out.data = list(self.rows) - out.columns = self.columns - out.warnings = self.warnings - total_number_of_rows = len(out.data) - if total_number_of_rows > MAX_ROWS_IN_PREVIEW: - out.data = out.data[:MAX_ROWS_IN_PREVIEW] - out.max_rows_exceeded = True - out.max_rows_in_preview = MAX_ROWS_IN_PREVIEW - out.total_number_of_rows = total_number_of_rows - return out - def parse_data_from_template(self): - columns = self.parse_columns_from_header_row() - columns = self.detect_date_formats(columns) - columns, data = self.add_serial_no_column(columns, self.data) +class Row: + link_values_exist_map = {} - self.columns = columns - self.rows = data + def __init__(self, index, row, doctype, header, import_type): + self.index = index + self.row_number = index + 1 + self.doctype = doctype + self.data = row + self.header = header + self.import_type = import_type + self.warnings = [] - def parse_columns_from_header_row(self): - remap_column = self.template_options.remap_column - columns = [] - seen = [] + len_row = len(self.data) + len_columns = len(self.header.columns) + if len_row != len_columns: + less_than_columns = len_row < len_columns + message = ( + "Row has less values than columns" + if less_than_columns + else "Row has more values than columns" + ) + self.warnings.append( + {"row": self.row_number, "message": message,} + ) - df_by_labels_and_fieldnames = self.build_fields_dict_for_column_matching() + def parse_doc(self, doctype, parent_doc=None, table_df=None): + col_indexes = self.header.get_column_indexes(doctype, table_df) + values = self.get_values(col_indexes) + columns = self.header.get_columns(col_indexes) + doc = self._parse_doc(doctype, columns, values, parent_doc, table_df) + return doc - for i, header_title in enumerate(self.header_row): - header_row_index = str(i) - column_number = str(i + 1) - skip_import = False - fieldname = remap_column.get(header_row_index) + def _parse_doc(self, doctype, columns, values, parent_doc=None, table_df=None): + doc = frappe._dict() + if self.import_type == INSERT: + # new_doc returns a dict with default values set + doc = frappe.new_doc( + doctype, + parent_doc=parent_doc, + parentfield=table_df.fieldname if table_df else None, + as_dict=True, + ) - if fieldname and fieldname != "Don't Import": - df = df_by_labels_and_fieldnames.get(fieldname) + # remove standard fields and __islocal + for key in frappe.model.default_fields + ("__islocal",): + doc.pop(key, None) + + for col, value in zip(columns, values): + df = col.df + if value in INVALID_VALUES: + value = None + + if value is not None: + value = self.validate_value(value, col) + + if value is not None: + doc[df.fieldname] = self.parse_value(value, col) + + is_table = frappe.get_meta(doctype).istable + is_update = self.import_type == UPDATE + if is_table and is_update and doc.get("name") in INVALID_VALUES: + # for table rows being inserted in update + # create a new doc with defaults set + new_doc = frappe.new_doc(doctype, as_dict=True) + new_doc.update(doc) + doc = new_doc + + self.check_mandatory_fields(doctype, doc) + return doc + + def validate_value(self, value, col): + df = col.df + if df.fieldtype == "Select": + select_options = df.get_select_options() + if select_options and value not in select_options: + options_string = ", ".join([frappe.bold(d) for d in select_options]) + msg = _("Value must be one of {0}").format(options_string) + self.warnings.append( + { + "row": self.row_number, + "field": df.as_dict(convert_dates_to_str=True), + "message": msg, + } + ) + return + + elif df.fieldtype == "Link": + exists = self.link_exists(value, df) + if not exists: + msg = _("Value {0} missing for {1}").format( + frappe.bold(value), frappe.bold(df.options) + ) + self.warnings.append( + { + "row": self.row_number, + "field": df.as_dict(convert_dates_to_str=True), + "message": msg, + } + ) + return + elif df.fieldtype in ["Date", "Datetime"]: + value = self.get_date(value, col) + if isinstance(value, frappe.string_types): + # value was not parsed as datetime object + self.warnings.append( + { + "row": self.row_number, + "col": col.column_number, + "field": df.as_dict(convert_dates_to_str=True), + "message": _("Value {0} must in {1} format").format( + frappe.bold(value), frappe.bold(get_user_format(col.date_format)) + ), + } + ) + return + + return value + + def link_exists(self, value, df): + key = df.options + "::" + value + if Row.link_values_exist_map.get(key) is None: + Row.link_values_exist_map[key] = frappe.db.exists(df.options, value) + return Row.link_values_exist_map.get(key) + + def parse_value(self, value, col): + df = col.df + if isinstance(value, datetime) and df.fieldtype in ["Date", "Datetime"]: + return value + + value = cstr(value) + + # convert boolean values to 0 or 1 + valid_check_values = ["t", "f", "true", "false", "yes", "no", "y", "n"] + if df.fieldtype == "Check" and value.lower().strip() in valid_check_values: + value = value.lower().strip() + value = 1 if value in ["t", "true", "y", "yes"] else 0 + + if df.fieldtype in ["Int", "Check"]: + value = cint(value) + elif df.fieldtype in ["Float", "Percent", "Currency"]: + value = flt(value) + elif df.fieldtype in ["Date", "Datetime"]: + value = self.get_date(value, col) + + return value + + def get_date(self, value, column): + date_format = column.date_format + if date_format: + try: + return datetime.strptime(value, date_format) + except ValueError: + # ignore date values that dont match the format + # import will break for these values later + pass + return value + + def check_mandatory_fields(self, doctype, doc): + """If import type is Insert: + Check for mandatory fields (except table fields) in doc + if import type is Update: + Check for name field or autoname field in doc + """ + meta = frappe.get_meta(doctype) + if self.import_type == UPDATE: + if meta.istable: + # when updating records with table rows, + # there are two scenarios: + # 1. if row 'name' is provided in the template + # the table row will be updated + # 2. if row 'name' is not provided + # then a new row will be added + # so we dont need to check for mandatory + return + + id_field = self.get_id_field(doctype) + if doc.get(id_field.fieldname) in INVALID_VALUES: + self.warnings.append( + { + "row": self.row_number, + "message": _("{0} is a mandatory field").format(id_field.label), + } + ) + return + + fields = [ + df + for df in meta.fields + if df.fieldtype not in table_fieldtypes + and df.reqd + and doc.get(df.fieldname) in INVALID_VALUES + ] + + if not fields: + return + + if len(fields) == 1: + self.warnings.append( + { + "row": self.row_number, + "message": _("{0} is a mandatory field").format(fields[0].label), + } + ) + else: + fields_string = ", ".join([df.label for df in fields]) + self.warnings.append( + { + "row": self.row_number, + "message": _("{0} are mandatory fields").format(fields_string), + } + ) + + def get_id_field(self, doctype): + autoname_field = self.get_autoname_field(doctype) + if autoname_field: + return autoname_field + return frappe._dict({"label": "ID", "fieldname": "name", "fieldtype": "Data"}) + + def get_autoname_field(self, doctype): + meta = frappe.get_meta(doctype) + if meta.autoname and meta.autoname.startswith("field:"): + fieldname = meta.autoname[len("field:") :] + return meta.get_field(fieldname) + + def get_values(self, indexes): + return [self.data[i] for i in indexes] + + def get(self, index): + return self.data[index] + + def as_list(self): + return self.data + + +class Header(Row): + def __init__(self, index, row, doctype, raw_data, column_to_field_map): + self.index = index + self.row_number = index + 1 + self.data = row + self.doctype = doctype + + self.seen = [] + self.columns = [] + + for j, header in enumerate(row): + column_values = [get_item_at_index(r, j) for r in raw_data] + column = Column( + j, header, self.doctype, column_values, column_to_field_map.get(header), self.seen + ) + self.seen.append(header) + self.columns.append(column) + + doctypes = [] + for col in self.columns: + if not col.df: + continue + if col.df.parent == self.doctype: + doctypes.append((col.df.parent, None)) + else: + doctypes.append((col.df.parent, col.df.child_table_df)) + + self.doctypes = sorted( + list(set(doctypes)), key=lambda x: -1 if x[0] == self.doctype else 1 + ) + + def get_column_indexes(self, doctype, tablefield=None): + return [ + col.index + for col in self.columns + if not col.skip_import and col.df and col.df.parent == doctype + ] + + def get_columns(self, indexes): + return [self.columns[i] for i in indexes] + + def get_docfields(self, indexes): + return [col.df for col in self.get_columns(indexes)] + + +class Column: + seen = [] + fields_column_map = {} + + def __init__(self, index, header, doctype, column_values, map_to_field=None, seen=[]): + self.index = index + self.column_number = index + 1 + self.doctype = doctype + self.header_title = header + self.column_values = column_values + self.map_to_field = map_to_field + self.seen = seen + + self.date_format = None + self.df = None + self.skip_import = None + self.warnings = [] + + self.meta = frappe.get_meta(doctype) + self.parse() + self.parse_date_format() + + def parse(self): + # df_by_labels_and_fieldnames = Column.build_fields_dict_for_column_matching( + # self.doctype + # ) + + header_title = self.header_title + header_row_index = str(self.index) + column_number = str(self.column_number) + skip_import = False + + if self.map_to_field and self.map_to_field != "Don't Import": + df = get_df_for_column_header(self.doctype, self.map_to_field) + # df = df_by_labels_and_fieldnames.get(self.map_to_field) + if df: self.warnings.append( { - "col": column_number, "message": _("Mapping column {0} to field {1}").format( frappe.bold(header_title or "Untitled Column"), frappe.bold(df.label) ), @@ -197,138 +809,129 @@ class Importer: } ) else: - df = df_by_labels_and_fieldnames.get(header_title) - - if not df: - skip_import = True - else: - skip_import = False - - if header_title in seen: self.warnings.append( { - "col": column_number, - "message": _("Skipping Duplicate Column {0}").format(frappe.bold(header_title)), - "type": "info", - } - ) - df = None - skip_import = True - elif fieldname == "Don't Import": - skip_import = True - self.warnings.append( - { - "col": column_number, - "message": _("Skipping column {0}").format(frappe.bold(header_title)), - "type": "info", - } - ) - elif header_title and not df: - self.warnings.append( - { - "col": column_number, - "message": _("Cannot match column {0} with any field").format( - frappe.bold(header_title) + "message": _("Could not map column {0} to field {1}").format( + column_number, self.map_to_field ), "type": "info", } ) - elif not header_title and not df: - self.warnings.append( - {"col": column_number, "message": _("Skipping Untitled Column"), "type": "info"} - ) + else: + df = get_df_for_column_header(self.doctype, header_title) + # df = df_by_labels_and_fieldnames.get(header_title) - columns.append( - frappe._dict( - df=df, - skip_import=skip_import, - header_title=header_title, - column_number=column_number, - index=i, - ) - ) - seen.append(header_title) + if not df: + skip_import = True + else: + skip_import = False - return columns - - def build_fields_dict_for_column_matching(self): - """ - Build a dict with various keys to match with column headers and value as docfield - The keys can be label or fieldname - { - 'Customer': df1, - 'customer': df1, - 'Due Date': df2, - 'due_date': df2, - 'Item Code (Sales Invoice Item)': df3, - 'Sales Invoice Item:item_code': df3, - } - """ - out = {} - - table_doctypes = [df.options for df in self.meta.get_table_fields()] - doctypes = table_doctypes + [self.doctype] - for doctype in doctypes: - # name field - name_key = "ID" if self.doctype == doctype else "ID ({})".format(doctype) - name_df = frappe._dict( + if header_title in self.seen: + self.warnings.append( { - "fieldtype": "Data", - "fieldname": "name", - "label": "ID", - "reqd": self.import_type == UPDATE, - "parent": doctype, + "col": column_number, + "message": _("Skipping Duplicate Column {0}").format(frappe.bold(header_title)), + "type": "info", } ) - out[name_key] = name_df - out["name"] = name_df + df = None + skip_import = True + elif self.map_to_field == "Don't Import": + skip_import = True + self.warnings.append( + { + "col": column_number, + "message": _("Skipping column {0}").format(frappe.bold(header_title)), + "type": "info", + } + ) + elif header_title and not df: + self.warnings.append( + { + "col": column_number, + "message": _("Cannot match column {0} with any field").format( + frappe.bold(header_title) + ), + "type": "info", + } + ) + elif not header_title and not df: + self.warnings.append( + {"col": column_number, "message": _("Skipping Untitled Column"), "type": "info"} + ) - # other fields - meta = frappe.get_meta(doctype) - fields = self.get_standard_fields(doctype) + meta.fields - for df in fields: - fieldtype = df.fieldtype or "Data" - parent = df.parent or self.doctype - if fieldtype not in no_value_fields: - if self.doctype == doctype: - # for parent doctypes keys will be - # Label - # label - # Label (label) - if not out.get(df.label): - # if Label is already set, don't set it again - # in case of duplicate column headers - out[df.label] = df - out[df.fieldname] = df - label_with_fieldname = "{0} ({1})".format(df.label, df.fieldname) - out[label_with_fieldname] = df - else: - # for child doctypes keys will be - # Label (Child DocType) - # Child DocType:label - # Label (label) (Child DocType) - label = "{0} ({1})".format(df.label, parent) - fieldname = "{0}:{1}".format(doctype, df.fieldname) - label_with_fieldname = "{0} ({1}) ({2})".format(df.label, df.fieldname, parent) - if not out.get(label): - # if Label is already set, don't set it again - # in case of duplicate column headers - out[label] = df - out[fieldname] = df - out[label_with_fieldname] = df + self.df = df + self.skip_import = skip_import - # if autoname is based on field - # add an entry for "ID (Autoname Field)" - autoname_field = self.get_autoname_field(self.doctype) - if autoname_field: - out["ID ({})".format(autoname_field.label)] = autoname_field - # ID field should also map to the autoname field - out["ID"] = autoname_field - out["name"] = autoname_field + def parse_date_format(self): + if self.df and self.df.fieldtype in ("Date", "Time", "Datetime"): + self.date_format = self.guess_date_format_for_column() - return out + def guess_date_format_for_column(self): + """ Guesses date format for a column by parsing the first 100 values in the column, + getting the date format and then returning the one which has the maximum frequency + """ + PARSE_ROW_COUNT = 100 - def get_standard_fields(self, doctype): + date_formats = [ + frappe.utils.guess_date_format(d) for d in self.column_values if isinstance(d, str) + ] + date_formats = [d for d in date_formats if d] + if not date_formats: + return + + unique_date_formats = set(date_formats) + print(unique_date_formats) + max_occurred_date_format = max(unique_date_formats, key=date_formats.count) + + # fmt: off + message = _("The column {0} has {1} different date formats. Automatically setting {2} as the default format as it is the most common. Please change other values in this column to this format.") + # fmt: on + user_date_format = get_user_format(max_occurred_date_format) + self.warnings.append( + { + "col": self.column_number, + "message": message.format( + frappe.bold(self.header_title), + len(unique_date_formats), + frappe.bold(user_date_format), + ), + "type": "info", + } + ) + + return max_occurred_date_format + + def as_dict(self): + d = frappe._dict() + d.index = self.index + d.column_number = self.column_number + d.doctype = self.doctype + d.header_title = self.header_title + d.column_values = self.column_values + d.map_to_field = self.map_to_field + d.date_format = self.date_format + d.df = self.df + d.skip_import = self.skip_import + d.warnings = self.warnings + return d + + +def build_fields_dict_for_column_matching(parent_doctype): + """ + Build a dict with various keys to match with column headers and value as docfield + The keys can be label or fieldname + { + 'Customer': df1, + 'customer': df1, + 'Due Date': df2, + 'due_date': df2, + 'Item Code (Sales Invoice Item)': df3, + 'Sales Invoice Item:item_code': df3, + } + """ + + def get_standard_fields(doctype): meta = frappe.get_meta(doctype) if meta.istable: standard_fields = [ @@ -350,714 +953,124 @@ class Importer: out.append(df) return out - def detect_date_formats(self, columns): - for col in columns: - if col.df and col.df.fieldtype in ["Date", "Time", "Datetime"]: - col.date_format = self.guess_date_format_for_column(col, columns) - return columns + parent_meta = frappe.get_meta(parent_doctype) + out = {} - def add_serial_no_column(self, columns, data): - columns_with_serial_no = [ - frappe._dict({"header_title": "Sr. No", "skip_import": True}) - ] + columns + # doctypes and fieldname if it is a child doctype + doctypes = [[parent_doctype, None]] + [ + [df.options, df] for df in parent_meta.get_table_fields() + ] - # update index for each column - for i, col in enumerate(columns_with_serial_no): - col.index = i + for doctype, table_df in doctypes: + # name field + name_by_label = ( + "ID" if doctype == parent_doctype else "ID ({0})".format(table_df.label) + ) + name_by_fieldname = ( + "name" if doctype == parent_doctype else "{0}.name".format(table_df.fieldname) + ) + name_df = frappe._dict( + { + "fieldtype": "Data", + "fieldname": "name", + "label": "ID", + "reqd": 1, # self.import_type == UPDATE, + "parent": doctype, + } + ) - data_with_serial_no = [] - for i, row in enumerate(data): - data_with_serial_no.append([self.row_index_map[i] + 1] + row) + if doctype != parent_doctype: + name_df.is_child_table_field = True + name_df.child_table_df = table_df - return columns_with_serial_no, data_with_serial_no + out[name_by_label] = name_df + out[name_by_fieldname] = name_df - def parse_value(self, value, df): - if isinstance(value, datetime) and df.fieldtype in ["Date", "Datetime"]: - return value - - value = cstr(value) - - # convert boolean values to 0 or 1 - valid_check_values = ["t", "f", "true", "false", "yes", "no", "y", "n"] - if df.fieldtype == "Check" and value.lower().strip() in valid_check_values: - value = value.lower().strip() - value = 1 if value in ["t", "true", "y", "yes"] else 0 - - if df.fieldtype in ["Int", "Check"]: - value = cint(value) - elif df.fieldtype in ["Float", "Percent", "Currency"]: - value = flt(value) - elif df.fieldtype in ["Date", "Datetime"]: - value = self.parse_date_format(value, df) - - return value - - def parse_date_format(self, value, df): - date_format = self.get_date_format_for_df(df) or DATETIME_FORMAT - try: - return datetime.strptime(value, date_format) - except ValueError: - # ignore date values that dont match the format - # import will break for these values later - pass - return value - - def get_date_format_for_df(self, df): - return self._guessed_date_formats.get(df.parent + df.fieldname) - - def guess_date_format_for_column(self, column, columns): - """ Guesses date format for a column by parsing the first 10 values in the column, - getting the date format and then returning the one which has the maximum frequency - """ - PARSE_ROW_COUNT = 10 - - df = column.df - key = df.parent + df.fieldname - - if not self._guessed_date_formats.get(key): - matches = [col for col in columns if col.df == df] - if not matches: - self._guessed_date_formats[key] = None - return - - column = matches[0] - column_index = column.index - - date_values = [ - row[column_index] for row in self.data[:PARSE_ROW_COUNT] if row[column_index] - ] - date_formats = [ - guess_date_format(d) if isinstance(d, str) else None for d in date_values - ] - if not date_formats: - return - max_occurred_date_format = max(set(date_formats), key=date_formats.count) - self._guessed_date_formats[key] = max_occurred_date_format - - return self._guessed_date_formats[key] - - def import_data(self): - # set user lang for translations - frappe.cache().hdel("lang", frappe.session.user) - frappe.set_user_lang(frappe.session.user) - - if not self.console: - self.data_import.db_set("template_warnings", "") - - # set flags - frappe.flags.in_import = True - frappe.flags.mute_emails = self.data_import.mute_emails - - # prepare a map for missing link field values - self.prepare_missing_link_field_values() - - # parse docs from rows - payloads = self.get_payloads_for_import() - - # dont import if there are non-ignorable warnings - warnings = [w for w in self.warnings if w.get("type") != "info"] - if warnings: - if self.console: - self.print_grouped_warnings(warnings) - else: - self.data_import.db_set("template_warnings", json.dumps(warnings)) - frappe.publish_realtime( - "data_import_refresh", {"data_import": self.data_import.name} - ) - return - - # setup import log - if self.data_import.import_log: - import_log = frappe.parse_json(self.data_import.import_log) - else: - import_log = [] - - # remove previous failures from import log - import_log = [l for l in import_log if l.get("success") == True] - - # get successfully imported rows - imported_rows = [] - for log in import_log: - log = frappe._dict(log) - if log.success: - imported_rows += log.row_indexes - - # start import - total_payload_count = len(payloads) - batch_size = frappe.conf.data_import_batch_size or 1000 - - for batch_index, batched_payloads in enumerate( - frappe.utils.create_batch(payloads, batch_size) - ): - for i, payload in enumerate(batched_payloads): - doc = payload.doc - row_indexes = [row[0] for row in payload.rows] - current_index = (i + 1) + (batch_index * batch_size) - - if set(row_indexes).intersection(set(imported_rows)): - print("Skipping imported rows", row_indexes) - if total_payload_count > 5: - frappe.publish_realtime( - "data_import_progress", - { - "current": current_index, - "total": total_payload_count, - "skipping": True, - "data_import": self.data_import.name, - }, - ) - continue - - try: - start = timeit.default_timer() - doc = self.process_doc(doc) - processing_time = timeit.default_timer() - start - eta = self.get_eta(current_index, total_payload_count, processing_time) - - if total_payload_count > 5: - frappe.publish_realtime( - "data_import_progress", - { - "current": current_index, - "total": total_payload_count, - "docname": doc.name, - "data_import": self.data_import.name, - "success": True, - "row_indexes": row_indexes, - "eta": eta, - }, - ) - if self.console: - update_progress_bar( - "Importing {0} records".format(total_payload_count), - current_index, - total_payload_count, - ) - import_log.append( - frappe._dict(success=True, docname=doc.name, row_indexes=row_indexes) + # other fields + fields = get_standard_fields(doctype) + frappe.get_meta(doctype).fields + for df in fields: + fieldtype = df.fieldtype or "Data" + parent = df.parent or parent_doctype + if fieldtype not in no_value_fields: + if parent_doctype == doctype: + # for parent doctypes keys will be + # Label + # label + # Label (label) + if not out.get(df.label): + # if Label is already set, don't set it again + # in case of duplicate column headers + out[df.label] = df + out[df.fieldname] = df + label_with_fieldname = "{0} ({1})".format(df.label, df.fieldname) + out[label_with_fieldname] = df + else: + # in case there are multiple table fields with the same doctype + # for child doctypes keys will be + # Label (Table Field Label) + # table_field.fieldname + table_fields = parent_meta.get( + "fields", {"fieldtype": ["in", table_fieldtypes], "options": parent} ) - # commit after every successful import - frappe.db.commit() + for table_field in table_fields: + by_label = "{0} ({1})".format(df.label, table_field.label) + by_fieldname = "{0}.{1}".format(table_field.fieldname, df.fieldname) - except Exception: - import_log.append( - frappe._dict( - success=False, - exception=frappe.get_traceback(), - messages=frappe.local.message_log, - row_indexes=row_indexes, - ) - ) - frappe.clear_messages() - # rollback if exception - frappe.db.rollback() + # create a new df object to avoid mutation problems + if isinstance(df, dict): + new_df = frappe._dict(df.copy()) + else: + new_df = df.as_dict() - # set status - failures = [l for l in import_log if l.get("success") == False] - if len(failures) == total_payload_count: - status = "Pending" - elif len(failures) > 0: - status = "Partial Success" - else: - status = "Success" + new_df.is_child_table_field = True + new_df.child_table_df = table_field + out[by_label] = new_df + out[by_fieldname] = new_df - if self.console: - self.print_import_log(import_log) - else: - self.data_import.db_set("status", status) - self.data_import.db_set("import_log", json.dumps(import_log)) + # if autoname is based on field + # add an entry for "ID (Autoname Field)" + autoname_field = get_autoname_field(parent_doctype) + if autoname_field: + out["ID ({})".format(autoname_field.label)] = autoname_field + # ID field should also map to the autoname field + out["ID"] = autoname_field + out["name"] = autoname_field - frappe.flags.in_import = False - frappe.flags.mute_emails = False - frappe.publish_realtime("data_import_refresh", {"data_import": self.data_import.name}) + return out - return import_log - def get_payloads_for_import(self): - payloads = [] - # make a copy - data = list(self.rows) - while data: - doc, rows, data = self.parse_next_row_for_import(data) - payloads.append(frappe._dict(doc=doc, rows=rows)) - return payloads +def get_df_for_column_header(doctype, header): + def build_fields_dict_for_doctype(): + return build_fields_dict_for_column_matching(doctype) - def parse_next_row_for_import(self, data): - """ - Parses rows that make up a doc. A doc maybe built from a single row or multiple rows. - Returns the doc, rows, and data without the rows. - """ - doctypes = set([col.df.parent for col in self.columns if col.df and col.df.parent]) + df_by_labels_and_fieldname = frappe.cache().hget( + "data_import_column_header_map", doctype, generator=build_fields_dict_for_doctype + ) + return df_by_labels_and_fieldname.get(header) - # first row is included by default - first_row = data[0] - rows = [first_row] - # if there are child doctypes, find the subsequent rows - if len(doctypes) > 1: - # subsequent rows either dont have any parent value set - # or have the same value as the parent row - # we include a row if either of conditions match - parent_column_indexes = [ - col.index - for col in self.columns - if not col.skip_import and col.df and col.df.parent == self.doctype - ] - parent_row_values = [first_row[i] for i in parent_column_indexes] +# utilities - data_without_first_row = data[1:] - for row in data_without_first_row: - row_values = [row[i] for i in parent_column_indexes] - # if the row is blank, it's a child row doc - if all([v in INVALID_VALUES for v in row_values]): - rows.append(row) - continue - # if the row has same values as parent row, it's a child row doc - if row_values == parent_row_values: - rows.append(row) - continue - # if any of those conditions dont match, it's the next doc - break - def get_column_indexes(doctype): - return [ - col.index - for col in self.columns - if not col.skip_import and col.df and col.df.parent == doctype - ] +def get_autoname_field(doctype): + meta = frappe.get_meta(doctype) + if meta.autoname and meta.autoname.startswith("field:"): + fieldname = meta.autoname[len("field:") :] + return meta.get_field(fieldname) - def validate_value(value, df): - if df.fieldtype == "Select": - select_options = df.get_select_options() - if select_options and value not in select_options: - options_string = ", ".join([frappe.bold(d) for d in select_options]) - msg = _("Value must be one of {0}").format(options_string) - self.warnings.append( - { - "row": row_number, - "field": df.as_dict(convert_dates_to_str=True), - "message": msg, - } - ) - return - elif df.fieldtype == "Link": - d = self.get_missing_link_field_values(df.options) - if value in d.missing_values and not d.one_mandatory: - msg = _("Value {0} missing for {1}").format( - frappe.bold(value), frappe.bold(df.options) - ) - self.warnings.append( - { - "row": row_number, - "field": df.as_dict(convert_dates_to_str=True), - "message": msg, - } - ) - return value +def get_item_at_index(_list, i, default=None): + try: + a = _list[i] + except IndexError: + a = default + return a - return value - def parse_doc(doctype, docfields, values, row_number): - doc = frappe._dict() - if self.import_type == INSERT: - # new_doc returns a dict with default values set - doc = frappe.new_doc(doctype, as_dict=True) - - # remove standard fields and __islocal - for key in frappe.model.default_fields + ("__islocal",): - doc.pop(key, None) - - for df, value in zip(docfields, values): - if value in INVALID_VALUES: - value = None - - if value is not None: - value = validate_value(value, df) - - if value is not None: - doc[df.fieldname] = self.parse_value(value, df) - - is_table = frappe.get_meta(doctype).istable - is_update = self.import_type == UPDATE - if is_table and is_update and doc.get("name") in INVALID_VALUES: - # for table rows being inserted in update - # create a new doc with defaults set - new_doc = frappe.new_doc(doctype, as_dict=True) - new_doc.update(doc) - doc = new_doc - - check_mandatory_fields(doctype, doc, row_number) - return doc - - def check_mandatory_fields(doctype, doc, row_number): - """If import type is Insert: - Check for mandatory fields (except table fields) in doc - if import type is Update: - Check for name field or autoname field in doc - """ - meta = frappe.get_meta(doctype) - if self.import_type == UPDATE: - if meta.istable: - # when updating records with table rows, - # there are two scenarios: - # 1. if row 'name' is provided in the template - # the table row will be updated - # 2. if row 'name' is not provided - # then a new row will be added - # so we dont need to check for mandatory - return - - id_field = self.get_id_field(doctype) - if doc.get(id_field.fieldname) in INVALID_VALUES: - self.warnings.append( - { - "row": row_number, - "message": _("{0} is a mandatory field").format(id_field.label), - } - ) - return - - fields = [ - df - for df in meta.fields - if df.fieldtype not in table_fields - and df.reqd - and doc.get(df.fieldname) in INVALID_VALUES - ] - - if not fields: - return - - if len(fields) == 1: - self.warnings.append( - { - "row": row_number, - "message": _("{0} is a mandatory field").format(fields[0].label), - } - ) - else: - fields_string = ", ".join([df.label for df in fields]) - self.warnings.append( - {"row": row_number, "message": _("{0} are mandatory fields").format(fields_string)} - ) - - parsed_docs = {} - for row in rows: - for doctype in doctypes: - if doctype == self.doctype and parsed_docs.get(doctype): - # if parent doc is already parsed from the first row - # then skip - continue - - row_number = row[0] - column_indexes = get_column_indexes(doctype) - values = [row[i] for i in column_indexes] - - if all(v in INVALID_VALUES for v in values): - # skip values if all of them are empty - continue - - columns = [self.columns[i] for i in column_indexes] - docfields = [col.df for col in columns] - doc = parse_doc(doctype, docfields, values, row_number) - parsed_docs[doctype] = parsed_docs.get(doctype, []) - parsed_docs[doctype].append(doc) - - # build the doc with children - doc = {} - for doctype, docs in parsed_docs.items(): - if doctype == self.doctype: - doc.update(docs[0]) - else: - table_dfs = self.meta.get( - "fields", {"options": doctype, "fieldtype": ["in", table_fields]} - ) - if table_dfs: - table_field = table_dfs[0] - doc[table_field.fieldname] = docs - - # check if there is atleast one row for mandatory table fields - mandatory_table_fields = [ - df - for df in self.meta.fields - if df.fieldtype in table_fields and df.reqd and len(doc.get(df.fieldname, [])) == 0 - ] - if len(mandatory_table_fields) == 1: - self.warnings.append( - { - "row": first_row[0], - "message": _("There should be atleast one row for {0} table").format( - mandatory_table_fields[0].label - ), - } - ) - elif mandatory_table_fields: - fields_string = ", ".join([df.label for df in mandatory_table_fields]) - message = _("There should be atleast one row for the following tables: {0}").format( - fields_string - ) - self.warnings.append({"row": first_row[0], "message": message}) - - return doc, rows, data[len(rows) :] - - def process_doc(self, doc): - if self.import_type == INSERT: - return self.insert_record(doc) - elif self.import_type == UPDATE: - return self.update_record(doc) - - def insert_record(self, doc): - self.create_missing_linked_records(doc) - - new_doc = frappe.new_doc(self.doctype) - new_doc.update(doc) - # name shouldn't be set when inserting a new record - new_doc.set("name", None) - new_doc.insert() - if self.meta.is_submittable and self.data_import.submit_after_import: - new_doc.submit() - return new_doc - - def create_missing_linked_records(self, doc): - """ - Finds fields that are of type Link, and creates the corresponding - document automatically if it has only one mandatory field - """ - link_values = [] - - def get_link_fields(doc, doctype): - for fieldname, value in doc.items(): - meta = frappe.get_meta(doctype) - df = meta.get_field(fieldname) - if not df: - continue - if df.fieldtype == "Link" and value not in INVALID_VALUES: - link_values.append([df.options, value]) - elif df.fieldtype in table_fields: - for row in value: - get_link_fields(row, df.options) - - get_link_fields(doc, self.doctype) - - for link_doctype, link_value in link_values: - d = self.missing_link_values.get(link_doctype) - if d and d.one_mandatory and link_value in d.missing_values: - # find the autoname field - autoname_field = self.get_autoname_field(link_doctype) - name_field = autoname_field.fieldname if autoname_field else "name" - new_doc = frappe.new_doc(link_doctype) - new_doc.set(name_field, link_value) - new_doc.insert() - d.missing_values.remove(link_value) - - def update_record(self, doc): - id_fieldname = self.get_id_fieldname(self.doctype) - id_value = doc[id_fieldname] - existing_doc = frappe.get_doc(self.doctype, id_value) - existing_doc.flags.updater_reference = { - "doctype": self.data_import.doctype, - "docname": self.data_import.name, - "label": _("via Data Import"), - } - existing_doc.update(doc) - existing_doc.save() - return existing_doc - - def export_errored_rows(self): - from frappe.utils.csvutils import build_csv_response - - if not self.data_import: - return - - import_log = frappe.parse_json(self.data_import.import_log or "[]") - failures = [l for l in import_log if l.get("success") == False] - row_indexes = [] - for f in failures: - row_indexes.extend(f.get("row_indexes", [])) - - # de duplicate - row_indexes = list(set(row_indexes)) - row_indexes.sort() - - header_row = [col.header_title for col in self.columns[1:]] - rows = [header_row] - rows += [row[1:] for row in self.rows if row[0] in row_indexes] - - build_csv_response(rows, self.doctype) - - def get_missing_link_field_values(self, doctype): - return self.missing_link_values.get(doctype, {}) - - def prepare_missing_link_field_values(self): - columns = self.columns - rows = self.rows - link_column_indexes = [ - col.index for col in columns if col.df and col.df.fieldtype == "Link" - ] - - self.missing_link_values = {} - for index in link_column_indexes: - col = columns[index] - column_values = [row[index] for row in rows] - values = set([v for v in column_values if v not in INVALID_VALUES]) - doctype = col.df.options - - missing_values = [value for value in values if not frappe.db.exists(doctype, value)] - if self.missing_link_values.get(doctype): - self.missing_link_values[doctype].missing_values += missing_values - else: - self.missing_link_values[doctype] = frappe._dict( - missing_values=missing_values, - one_mandatory=self.has_one_mandatory_field(doctype), - df=col.df, - ) - - def get_eta(self, current, total, processing_time): - remaining = total - current - eta = processing_time * remaining - if not self.last_eta or eta < self.last_eta: - self.last_eta = eta - return self.last_eta - - def has_one_mandatory_field(self, doctype): - meta = frappe.get_meta(doctype) - # get mandatory fields with default not set - mandatory_fields = [df for df in meta.fields if df.reqd and not df.default] - mandatory_fields_count = len(mandatory_fields) - if meta.autoname and meta.autoname.lower() == "prompt": - mandatory_fields_count += 1 - return mandatory_fields_count == 1 - - def get_id_fieldname(self, doctype): - return self.get_id_field(doctype).fieldname - - def get_id_field(self, doctype): - autoname_field = self.get_autoname_field(doctype) - if autoname_field: - return autoname_field - return frappe._dict({"label": "ID", "fieldname": "name", "fieldtype": "Data"}) - - def get_autoname_field(self, doctype): - meta = frappe.get_meta(doctype) - if meta.autoname and meta.autoname.startswith("field:"): - fieldname = meta.autoname[len("field:") :] - return meta.get_field(fieldname) - - def print_grouped_warnings(self, warnings): - warnings_by_row = {} - other_warnings = [] - for w in warnings: - if w.get("row"): - warnings_by_row.setdefault(w.get("row"), []).append(w) - else: - other_warnings.append(w) - - for row_number, warnings in warnings_by_row.items(): - print("Row {0}".format(row_number)) - for w in warnings: - print(w.get("message")) - - for w in other_warnings: - print(w.get("message")) - - def print_import_log(self, import_log): - failed_records = [l for l in import_log if not l.success] - successful_records = [l for l in import_log if l.success] - - if successful_records: - print( - "Successfully imported {0} records out of {1}".format( - len(successful_records), len(import_log) - ) - ) - - if failed_records: - print("Failed to import {0} records".format(len(failed_records))) - file_name = "{0}_import_on_{1}.txt".format(self.doctype, frappe.utils.now()) - print("Check {0} for errors".format(os.path.join("sites", file_name))) - text = "" - for w in failed_records: - text += "Row Indexes: {0}\n".format(str(w.get("row_indexes", []))) - text += "Messages:\n{0}\n".format("\n".join(w.get("messages", []))) - text += "Traceback:\n{0}\n\n".format(w.get("exception")) - - with open(file_name, "w") as f: - f.write(text) - - -DATE_FORMATS = [ - r"%d-%m-%Y", - r"%m-%d-%Y", - r"%Y-%m-%d", - r"%d-%m-%y", - r"%m-%d-%y", - r"%y-%m-%d", - r"%d/%m/%Y", - r"%m/%d/%Y", - r"%Y/%m/%d", - r"%d/%m/%y", - r"%m/%d/%y", - r"%y/%m/%d", - r"%d.%m.%Y", - r"%m.%d.%Y", - r"%Y.%m.%d", - r"%d.%m.%y", - r"%m.%d.%y", - r"%y.%m.%d", -] - -TIME_FORMATS = [ - r"%H:%M:%S.%f", - r"%H:%M:%S", - r"%H:%M", - r"%I:%M:%S.%f %p", - r"%I:%M:%S %p", - r"%I:%M %p", -] - - -def guess_date_format(date_string): - date_string = date_string.strip() - - _date = None - _time = None - - if " " in date_string: - _date, _time = date_string.split(" ", 1) - else: - _date = date_string - - date_format = None - time_format = None - - for f in DATE_FORMATS: - try: - # if date is parsed without any exception - # capture the date format - datetime.strptime(_date, f) - date_format = f - break - except ValueError: - pass - - if _time: - for f in TIME_FORMATS: - try: - # if time is parsed without any exception - # capture the time format - datetime.strptime(_time, f) - time_format = f - break - except ValueError: - pass - - full_format = date_format - if time_format: - full_format += " " + time_format - return full_format - - -def import_data(doctype, file_path): - i = Importer(doctype, file_path) - i.import_data() +def get_user_format(date_format): + return ( + date_format.replace("%Y", "yyyy") + .replace("%y", "yy") + .replace("%m", "mm") + .replace("%d", "dd") + ) diff --git a/frappe/public/js/frappe/data_import/data_exporter.js b/frappe/public/js/frappe/data_import/data_exporter.js index 8276be6670..21f0b78a25 100644 --- a/frappe/public/js/frappe/data_import/data_exporter.js +++ b/frappe/public/js/frappe/data_import/data_exporter.js @@ -202,16 +202,16 @@ frappe.data_import.DataExporter = class DataExporter { } select_mandatory() { - let mandatory_table_doctypes = frappe.meta + let mandatory_table_fields = frappe.meta .get_table_fields(this.doctype) .filter(df => df.reqd) - .map(df => df.options); - mandatory_table_doctypes.push(this.doctype); + .map(df => df.fieldname); + mandatory_table_fields.push(this.doctype); let multicheck_fields = this.dialog.fields .filter(df => df.fieldtype === 'MultiCheck') .map(df => df.fieldname) - .filter(doctype => mandatory_table_doctypes.includes(doctype)); + .filter(doctype => mandatory_table_fields.includes(doctype)); let checkboxes = [].concat( ...multicheck_fields.map(fieldname => { @@ -333,16 +333,24 @@ frappe.data_import.DataExporter = class DataExporter { } }; -function get_columns_for_picker(doctype) { +export function get_columns_for_picker(doctype) { let out = {}; - const standard_fields_filter = df => - !in_list(frappe.model.no_value_type, df.fieldtype); + const exportable_fields = df => { + let keep = true; + if (frappe.model.no_value_type.includes(df.fieldtype)) { + keep = false; + } + if (['lft', 'rgt'].includes(df.fieldname)) { + keep = false; + } + return keep; + }; // parent let doctype_fields = frappe.meta .get_docfields(doctype) - .filter(standard_fields_filter); + .filter(exportable_fields); out[doctype] = [ { @@ -359,7 +367,7 @@ function get_columns_for_picker(doctype) { const cdt = df.options; const child_table_fields = frappe.meta .get_docfields(cdt) - .filter(standard_fields_filter); + .filter(exportable_fields); out[df.fieldname] = [ { diff --git a/frappe/public/js/frappe/data_import/import_preview.js b/frappe/public/js/frappe/data_import/import_preview.js index 27d81b75b7..7cf8431456 100644 --- a/frappe/public/js/frappe/data_import/import_preview.js +++ b/frappe/public/js/frappe/data_import/import_preview.js @@ -1,5 +1,5 @@ import DataTable from 'frappe-datatable'; -import ColumnPickerFields from './column_picker_fields'; +import { get_columns_for_picker } from './data_exporter'; frappe.provide('frappe.data_import'); @@ -236,9 +236,7 @@ frappe.data_import.ImportPreview = class ImportPreview { } show_column_mapper() { - let column_picker_fields = new ColumnPickerFields({ - doctype: this.doctype - }); + let column_picker_fields = get_columns_for_picker(this.doctype); let changed = []; let fields = this.preview_data.columns.map((col, i) => { let df = col.df; diff --git a/frappe/public/less/form.less b/frappe/public/less/form.less index df0334c14f..cd391c1f10 100644 --- a/frappe/public/less/form.less +++ b/frappe/public/less/form.less @@ -249,6 +249,7 @@ } .progress-message { + font-feature-settings: "tnum" 1; margin-top: 0px; } } @@ -1011,7 +1012,7 @@ body[data-route^="Form/Communication"] textarea[data-fieldname="subject"] { .map-columns .form-section { padding: 0 7px 7px; - border-bottom: none; + border-top: none; .clearfix { display: none; @@ -1021,3 +1022,7 @@ body[data-route^="Form/Communication"] textarea[data-fieldname="subject"] { .map-columns .form-section:first-child { padding-top: 7px; } + +.table-preview { + margin-top: 12px; +} diff --git a/frappe/utils/data.py b/frappe/utils/data.py index 7e991f472e..0d946c01a8 100644 --- a/frappe/utils/data.py +++ b/frappe/utils/data.py @@ -1185,3 +1185,75 @@ def is_subset(list_a, list_b): def generate_hash(*args, **kwargs): return frappe.generate_hash(*args, **kwargs) + + + +def guess_date_format(date_string): + DATE_FORMATS = [ + r"%d-%m-%Y", + r"%m-%d-%Y", + r"%Y-%m-%d", + r"%d-%m-%y", + r"%m-%d-%y", + r"%y-%m-%d", + r"%d/%m/%Y", + r"%m/%d/%Y", + r"%Y/%m/%d", + r"%d/%m/%y", + r"%m/%d/%y", + r"%y/%m/%d", + r"%d.%m.%Y", + r"%m.%d.%Y", + r"%Y.%m.%d", + r"%d.%m.%y", + r"%m.%d.%y", + r"%y.%m.%d", + ] + + TIME_FORMATS = [ + r"%H:%M:%S.%f", + r"%H:%M:%S", + r"%H:%M", + r"%I:%M:%S.%f %p", + r"%I:%M:%S %p", + r"%I:%M %p", + ] + + date_string = date_string.strip() + + _date = None + _time = None + + if " " in date_string: + _date, _time = date_string.split(" ", 1) + else: + _date = date_string + + date_format = None + time_format = None + + for f in DATE_FORMATS: + try: + # if date is parsed without any exception + # capture the date format + datetime.datetime.strptime(_date, f) + date_format = f + break + except ValueError: + pass + + if _time: + for f in TIME_FORMATS: + try: + # if time is parsed without any exception + # capture the time format + datetime.datetime.strptime(_time, f) + time_format = f + break + except ValueError: + pass + + full_format = date_format + if time_format: + full_format += " " + time_format + return full_format