`;
@@ -346,8 +346,8 @@ frappe.ui.form.on('Data Import Beta', {
header = __('Column {0}', [warning.col]);
}
return `
-
-
${header}
+
+
${header}
${warning.message}
`;
@@ -355,7 +355,7 @@ frappe.ui.form.on('Data Import Beta', {
.join('');
frm.get_field('import_warnings').$wrapper.html(`
`);
},
diff --git a/frappe/core/doctype/data_import_beta/data_import_beta.json b/frappe/core/doctype/data_import_beta/data_import_beta.json
index 777af0a071..8876d2246a 100644
--- a/frappe/core/doctype/data_import_beta/data_import_beta.json
+++ b/frappe/core/doctype/data_import_beta/data_import_beta.json
@@ -16,11 +16,11 @@
"submit_after_import",
"mute_emails",
"template_options",
- "section_import_preview",
- "import_preview",
"import_warnings_section",
"template_warnings",
"import_warnings",
+ "section_import_preview",
+ "import_preview",
"import_log_section",
"import_log",
"show_failed_logs",
@@ -34,7 +34,9 @@
"label": "Document Type",
"options": "DocType",
"reqd": 1,
- "set_only_once": 1
+ "set_only_once": 1,
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "import_type",
@@ -43,28 +45,38 @@
"label": "Import Type",
"options": "\nInsert New Records\nUpdate Existing Records",
"reqd": 1,
- "set_only_once": 1
+ "set_only_once": 1,
+ "show_days": 1,
+ "show_seconds": 1
},
{
"depends_on": "eval:!doc.__islocal",
"fieldname": "import_file",
"fieldtype": "Attach",
"in_list_view": 1,
- "label": "Import File"
+ "label": "Import File",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "import_preview",
"fieldtype": "HTML",
- "label": "Import Preview"
+ "label": "Import Preview",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "section_import_preview",
"fieldtype": "Section Break",
- "label": "Preview"
+ "label": "Preview",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "column_break_5",
- "fieldtype": "Column Break"
+ "fieldtype": "Column Break",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "template_options",
@@ -72,23 +84,31 @@
"hidden": 1,
"label": "Template Options",
"options": "JSON",
- "read_only": 1
+ "read_only": 1,
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "import_log",
"fieldtype": "Code",
"label": "Import Log",
- "options": "JSON"
+ "options": "JSON",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "import_log_section",
"fieldtype": "Section Break",
- "label": "Import Log"
+ "label": "Import Log",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "import_log_preview",
"fieldtype": "HTML",
- "label": "Import Log Preview"
+ "label": "Import Log Preview",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"default": "Pending",
@@ -97,56 +117,72 @@
"hidden": 1,
"label": "Status",
"options": "Pending\nSuccess\nPartial Success\nError",
- "read_only": 1
+ "read_only": 1,
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "template_warnings",
"fieldtype": "Code",
"hidden": 1,
"label": "Template Warnings",
- "options": "JSON"
+ "options": "JSON",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"default": "0",
"fieldname": "submit_after_import",
"fieldtype": "Check",
"label": "Submit After Import",
- "set_only_once": 1
+ "set_only_once": 1,
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "import_warnings_section",
"fieldtype": "Section Break",
- "label": "Warnings"
+ "label": "Warnings",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"fieldname": "import_warnings",
"fieldtype": "HTML",
- "label": "Import Warnings"
+ "label": "Import Warnings",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"depends_on": "reference_doctype",
"fieldname": "download_template",
"fieldtype": "Button",
- "label": "Download Template"
+ "label": "Download Template",
+ "show_days": 1,
+ "show_seconds": 1
},
{
"default": "1",
"fieldname": "mute_emails",
"fieldtype": "Check",
"label": "Don't Send Emails",
- "set_only_once": 1
+ "set_only_once": 1,
+ "show_days": 1,
+ "show_seconds": 1
},
{
"default": "0",
"fieldname": "show_failed_logs",
"fieldtype": "Check",
- "label": "Show Failed Logs"
+ "label": "Show Failed Logs",
+ "show_days": 1,
+ "show_seconds": 1
}
],
"hide_toolbar": 1,
"links": [],
- "modified": "2020-02-17 15:35:04.386098",
- "modified_by": "faris@erpnext.com",
+ "modified": "2020-05-28 22:11:38.266208",
+ "modified_by": "Administrator",
"module": "Core",
"name": "Data Import Beta",
"owner": "Administrator",
diff --git a/frappe/core/doctype/data_import_beta/data_import_beta.py b/frappe/core/doctype/data_import_beta/data_import_beta.py
index 8f12bd20ed..23e0681011 100644
--- a/frappe/core/doctype/data_import_beta/data_import_beta.py
+++ b/frappe/core/doctype/data_import_beta/data_import_beta.py
@@ -5,8 +5,9 @@
from __future__ import unicode_literals
import frappe
from frappe.model.document import Document
-from frappe.core.doctype.data_import.importer_new import Importer
-from frappe.core.doctype.data_import.exporter_new import Exporter
+
+from frappe.core.doctype.data_import_beta.importer import Importer
+from frappe.core.doctype.data_import_beta.exporter import Exporter
from frappe.core.page.background_jobs.background_jobs import get_info
from frappe.utils.background_jobs import enqueue
from frappe import _
@@ -25,7 +26,10 @@ class DataImportBeta(Document):
# validate template
self.get_importer()
- def get_preview_from_template(self):
+ def get_preview_from_template(self, import_file=None):
+ if import_file:
+ self.import_file = import_file
+
if not self.import_file:
return
@@ -62,8 +66,8 @@ class DataImportBeta(Document):
@frappe.whitelist()
-def get_preview_from_template(data_import):
- return frappe.get_doc("Data Import Beta", data_import).get_preview_from_template()
+def get_preview_from_template(data_import, import_file):
+ return frappe.get_doc("Data Import Beta", data_import).get_preview_from_template(import_file)
@frappe.whitelist()
@@ -81,8 +85,8 @@ def start_import(data_import):
frappe.db.rollback()
data_import.db_set("status", "Error")
frappe.log_error(title=data_import.name)
- frappe.db.commit()
- frappe.publish_realtime("data_import_refresh", {"data_import": data_import.name})
+
+ frappe.publish_realtime("data_import_refresh", {"data_import": data_import.name})
@frappe.whitelist()
diff --git a/frappe/core/doctype/data_import_beta/importer_new.py b/frappe/core/doctype/data_import_beta/importer_new.py
index cbb2ee482b..02721fb93f 100644
--- a/frappe/core/doctype/data_import_beta/importer_new.py
+++ b/frappe/core/doctype/data_import_beta/importer_new.py
@@ -1,12 +1,12 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) 2019, Frappe Technologies Pvt. Ltd. and Contributors
+# Copyright (c) 2020, Frappe Technologies Pvt. Ltd. and Contributors
# MIT License. See license.txt
-import io
+from __future__ import unicode_literals
import os
-import json
-import timeit
+import io
import frappe
+import timeit
+import json
from datetime import datetime
from frappe import _
from frappe.utils import cint, flt, update_progress_bar, cstr, DATETIME_FORMAT
@@ -15,65 +15,406 @@ from frappe.utils.xlsxutils import (
read_xlsx_file_from_attached_file,
read_xls_file_from_attached_file,
)
-from frappe.model import no_value_fields, table_fields
+from frappe.model import no_value_fields, table_fields as table_fieldtypes
-INVALID_VALUES = ["", None]
+INVALID_VALUES = ("", None)
MAX_ROWS_IN_PREVIEW = 10
INSERT = "Insert New Records"
UPDATE = "Update Existing Records"
-# pylint: disable=R0201
+
class Importer:
- def __init__(
- self, doctype, data_import=None, file_path=None, content=None, console=False
- ):
+ def __init__(self, doctype, data_import=None, import_type=None, console=False):
self.doctype = doctype
- self.template_options = frappe._dict({"remap_column": {}})
self.console = console
- if data_import:
- self.data_import = data_import
- if self.data_import.template_options:
- template_options = frappe.parse_json(self.data_import.template_options)
- self.template_options.update(template_options)
- self.import_type = self.data_import.import_type
+ self.data_import = data_import
+ if not self.data_import:
+ self.data_import = frappe.get_doc(doctype="Data Import Beta")
+ if import_type:
+ self.data_import.import_type = import_type
+
+ self.template_options = frappe.parse_json(self.data_import.template_options or "{}")
+ self.import_type = self.data_import.import_type
+
+ self.import_file = ImportFile(
+ doctype, data_import.import_file, self.template_options, self.import_type
+ )
+
+ def get_data_for_import_preview(self):
+ return self.import_file.get_data_for_import_preview()
+
+ def before_import(self):
+ # set user lang for translations
+ frappe.cache().hdel("lang", frappe.session.user)
+ frappe.set_user_lang(frappe.session.user)
+
+ # set flags
+ frappe.flags.in_import = True
+ frappe.flags.mute_emails = self.data_import.mute_emails
+
+ self.data_import.db_set("template_warnings", "")
+
+ def import_data(self):
+ self.before_import()
+
+ # parse docs from rows
+ payloads = self.import_file.get_payloads_for_import()
+
+ # dont import if there are non-ignorable warnings
+ warnings = self.import_file.get_warnings()
+ warnings = [w for w in warnings if w.get("type") != "info"]
+
+ print(warnings)
+
+ if warnings:
+ if self.console:
+ self.print_grouped_warnings(warnings)
+ else:
+ self.data_import.db_set("template_warnings", json.dumps(warnings))
+ return
+
+ # setup import log
+ if self.data_import.import_log:
+ import_log = frappe.parse_json(self.data_import.import_log)
else:
- self.data_import = None
+ import_log = []
- self.import_type = self.import_type or INSERT
+ # remove previous failures from import log
+ import_log = [l for l in import_log if l.get("success") == True]
- self.header_row = None
- self.data = None
- # used to store date formats guessed from data rows per column
- self._guessed_date_formats = {}
- # used to store eta during import
- self.last_eta = 0
- # used to collect warnings during template parsing
- # and show them to user
- self.warnings = []
- self.meta = frappe.get_meta(doctype)
- self.prepare_content(file_path, content)
+ # get successfully imported rows
+ imported_rows = []
+ for log in import_log:
+ log = frappe._dict(log)
+ if log.success:
+ imported_rows += log.row_indexes
+
+ # start import
+ total_payload_count = len(payloads)
+ batch_size = frappe.conf.data_import_batch_size or 1000
+
+ for batch_index, batched_payloads in enumerate(
+ frappe.utils.create_batch(payloads, batch_size)
+ ):
+ for i, payload in enumerate(batched_payloads):
+ doc = payload.doc
+ row_indexes = [row.row_number for row in payload.rows]
+ current_index = (i + 1) + (batch_index * batch_size)
+
+ if set(row_indexes).intersection(set(imported_rows)):
+ print("Skipping imported rows", row_indexes)
+ if total_payload_count > 5:
+ frappe.publish_realtime(
+ "data_import_progress",
+ {
+ "current": current_index,
+ "total": total_payload_count,
+ "skipping": True,
+ "data_import": self.data_import.name,
+ },
+ )
+ continue
+
+ try:
+ start = timeit.default_timer()
+ doc = self.process_doc(doc)
+ processing_time = timeit.default_timer() - start
+ eta = self.get_eta(current_index, total_payload_count, processing_time)
+
+ if self.console:
+ update_progress_bar(
+ "Importing {0} records".format(total_payload_count),
+ current_index,
+ total_payload_count,
+ )
+ elif total_payload_count > 5:
+ frappe.publish_realtime(
+ "data_import_progress",
+ {
+ "current": current_index,
+ "total": total_payload_count,
+ "docname": doc.name,
+ "data_import": self.data_import.name,
+ "success": True,
+ "row_indexes": row_indexes,
+ "eta": eta,
+ },
+ )
+
+ import_log.append(
+ frappe._dict(success=True, docname=doc.name, row_indexes=row_indexes)
+ )
+ # commit after every successful import
+ frappe.db.commit()
+
+ except Exception:
+ import_log.append(
+ frappe._dict(
+ success=False,
+ exception=frappe.get_traceback(),
+ messages=frappe.local.message_log,
+ row_indexes=row_indexes,
+ )
+ )
+ frappe.clear_messages()
+ # rollback if exception
+ frappe.db.rollback()
+
+ # set status
+ failures = [l for l in import_log if l.get("success") == False]
+ if len(failures) == total_payload_count:
+ status = "Pending"
+ elif len(failures) > 0:
+ status = "Partial Success"
+ else:
+ status = "Success"
+
+ if self.console:
+ self.print_import_log(import_log)
+ else:
+ self.data_import.db_set("status", status)
+ self.data_import.db_set("import_log", json.dumps(import_log))
+
+ self.after_import()
+
+ return import_log
+
+ def after_import(self):
+ frappe.flags.in_import = False
+ frappe.flags.mute_emails = False
+
+ def process_doc(self, doc):
+ if self.import_type == INSERT:
+ return self.insert_record(doc)
+ elif self.import_type == UPDATE:
+ return self.update_record(doc)
+
+ def insert_record(self, doc):
+ meta = frappe.get_meta(self.doctype)
+ new_doc = frappe.new_doc(self.doctype)
+ new_doc.update(doc)
+
+ if (meta.autoname or "").lower() != "prompt":
+ # name can only be set directly if autoname is prompt
+ new_doc.set("name", None)
+
+ new_doc.flags.updater_reference = {
+ "doctype": self.data_import.doctype,
+ "docname": self.data_import.name,
+ "label": _("via Data Import"),
+ }
+
+ new_doc.insert()
+ if meta.is_submittable and self.data_import.submit_after_import:
+ new_doc.submit()
+ return new_doc
+
+ def update_record(self, doc):
+ existing_doc = frappe.get_doc(self.doctype, doc["name"])
+ existing_doc.flags.updater_reference = {
+ "doctype": self.data_import.doctype,
+ "docname": self.data_import.name,
+ "label": _("via Data Import"),
+ }
+ existing_doc.update(doc)
+ existing_doc.save()
+ return existing_doc
+
+ def get_eta(self, current, total, processing_time):
+ self.last_eta = getattr(self, "last_eta", 0)
+ remaining = total - current
+ eta = processing_time * remaining
+ if not self.last_eta or eta < self.last_eta:
+ self.last_eta = eta
+ return self.last_eta
+
+
+class ImportFile:
+ def __init__(self, doctype, file, template_options=None, import_type=None):
+ self.doctype = doctype
+ self.template_options = template_options or frappe._dict(
+ column_to_field_map=frappe._dict()
+ )
+ self.column_to_field_map = self.template_options.column_to_field_map
+ self.import_type = import_type
+
+ self.file_doc = self.file_path = None
+ if isinstance(file, frappe.model.document.Document) and file.doctype == "File":
+ self.file_doc = file
+ elif isinstance(file, frappe.string_types):
+ if frappe.db.exists("File", {"file_url": file}):
+ self.file_doc = frappe.get_doc("File", {"file_url": file})
+ elif os.path.exists(file):
+ self.file_path = file
+
+ if not self.file_doc and not self.file_path:
+ frappe.throw(_("Invalid template file for import"))
+
+ self.raw_data = self.get_data_from_template_file()
self.parse_data_from_template()
- def prepare_content(self, file_path, content):
+ def get_data_from_template_file(self):
+ content = None
extension = None
- if self.data_import and self.data_import.import_file:
- file_doc = frappe.get_doc("File", {"file_url": self.data_import.import_file})
- parts = file_doc.get_extension()
+
+ if self.file_doc:
+ parts = self.file_doc.get_extension()
extension = parts[1]
- content = file_doc.get_content()
+ content = self.file_doc.get_content()
extension = extension.lstrip(".")
- if file_path:
- content, extension = self.read_file(file_path)
+ elif self.file_path:
+ content, extension = self.read_file(self.file_path)
+
+ if not content:
+ frappe.throw(_("Invalid or corrupted content for import"))
if not extension:
extension = "csv"
if content:
- self.read_content(content, extension)
+ return self.read_content(content, extension)
- self.validate_template_content()
+ def parse_data_from_template(self):
+ header = None
+ data = []
+
+ for i, row in enumerate(self.raw_data):
+ if all(v in INVALID_VALUES for v in row):
+ # empty row
+ continue
+
+ if not header:
+ header = Header(i, row, self.doctype, self.raw_data, self.column_to_field_map)
+ else:
+ row_obj = Row(i, row, self.doctype, header, self.import_type)
+ data.append(row_obj)
+
+ self.header = header
+ self.columns = self.header.columns
+ self.data = data
+
+ if len(data) <= 1:
+ frappe.throw(
+ _("Import template should contain a Header and atleast one row."),
+ title=_("Template Error"),
+ )
+
+ def get_data_for_import_preview(self):
+ """Adds a serial number column as the first column"""
+
+ columns = [frappe._dict({"header_title": "Sr. No", "skip_import": True})]
+ columns += [col.as_dict() for col in self.columns]
+ data = [[row.row_number] + row.as_list() for row in self.data]
+
+ warnings = self.get_warnings()
+
+ out = frappe._dict()
+ out.data = data
+ out.columns = columns
+ out.warnings = warnings
+ total_number_of_rows = len(out.data)
+ if total_number_of_rows > MAX_ROWS_IN_PREVIEW:
+ out.data = out.data[:MAX_ROWS_IN_PREVIEW]
+ out.max_rows_exceeded = True
+ out.max_rows_in_preview = MAX_ROWS_IN_PREVIEW
+ out.total_number_of_rows = total_number_of_rows
+ return out
+
+ def get_payloads_for_import(self):
+ payloads = []
+ # make a copy
+ data = list(self.data)
+ while data:
+ doc, rows, data = self.parse_next_row_for_import(data)
+ payloads.append(frappe._dict(doc=doc, rows=rows))
+ return payloads
+
+ def parse_next_row_for_import(self, data):
+ """
+ Parses rows that make up a doc. A doc maybe built from a single row or multiple rows.
+ Returns the doc, rows, and data without the rows.
+ """
+ doctypes = self.header.doctypes
+
+ # first row is included by default
+ first_row = data[0]
+ rows = [first_row]
+
+ # if there are child doctypes, find the subsequent rows
+ if len(doctypes) > 1:
+ # subsequent rows either dont have any parent value set
+ # or have the same value as the parent row
+ # we include a row if either of conditions match
+ parent_column_indexes = self.header.get_column_indexes(self.doctype)
+ parent_row_values = first_row.get_values(parent_column_indexes)
+
+ data_without_first_row = data[1:]
+ for row in data_without_first_row:
+ row_values = row.get_values(parent_column_indexes)
+ # if the row is blank, it's a child row doc
+ if all([v in INVALID_VALUES for v in row_values]):
+ rows.append(row)
+ continue
+ # if the row has same values as parent row, it's a child row doc
+ if row_values == parent_row_values:
+ rows.append(row)
+ continue
+ # if any of those conditions dont match, it's the next doc
+ break
+
+ parsed_docs = {}
+ parent_doc = None
+ for row in rows:
+ for doctype, table_df in doctypes:
+ if doctype == self.doctype and not parent_doc:
+ parent_doc = row.parse_doc(doctype)
+
+ if doctype != self.doctype and table_df:
+ child_doc = row.parse_doc(doctype, parent_doc, table_df)
+ parent_doc[table_df.fieldname] = parent_doc.get(table_df.fieldname, [])
+ parent_doc[table_df.fieldname].append(child_doc)
+
+ doc = parent_doc
+ # check if there is atleast one row for mandatory table fields
+ meta = frappe.get_meta(self.doctype)
+ mandatory_table_fields = [
+ df
+ for df in meta.fields
+ if df.fieldtype in table_fieldtypes
+ and df.reqd
+ and len(doc.get(df.fieldname, [])) == 0
+ ]
+ if len(mandatory_table_fields) == 1:
+ self.warnings.append(
+ {
+ "row": first_row.row_number,
+ "message": _("There should be atleast one row for {0} table").format(
+ mandatory_table_fields[0].label
+ ),
+ }
+ )
+ elif mandatory_table_fields:
+ fields_string = ", ".join([df.label for df in mandatory_table_fields])
+ message = _("There should be atleast one row for the following tables: {0}").format(
+ fields_string
+ )
+ self.warnings.append({"row": first_row.row_number, "message": message})
+
+ return doc, rows, data[len(rows) :]
+
+ def get_warnings(self):
+ warnings = []
+ for col in self.header.columns:
+ warnings += col.warnings
+
+ for row in self.data:
+ warnings += row.warnings
+
+ return warnings
+
+ ######
def read_file(self, file_path):
extn = file_path.split(".")[1]
@@ -98,18 +439,10 @@ class Importer:
elif extension == "xls":
data = read_xls_file_from_attached_file(content)
- data = self.remove_empty_rows_and_columns(data)
-
- if len(data) <= 1:
- frappe.throw(
- _("Import template should contain a Header and atleast one row."), title=error_title
- )
-
- self.header_row = data[0]
- self.data = data[1:]
+ return data
def validate_template_content(self):
- column_count = len(self.header_row)
+ column_count = len(self.columns)
if any([len(row) != column_count and len(row) != 0 for row in self.data]):
frappe.throw(
_("Number of columns does not match with data"), title=_("Invalid Template")
@@ -151,45 +484,324 @@ class Importer:
return data_without_empty_rows_and_columns
- def get_data_for_import_preview(self):
- out = frappe._dict()
- out.data = list(self.rows)
- out.columns = self.columns
- out.warnings = self.warnings
- total_number_of_rows = len(out.data)
- if total_number_of_rows > MAX_ROWS_IN_PREVIEW:
- out.data = out.data[:MAX_ROWS_IN_PREVIEW]
- out.max_rows_exceeded = True
- out.max_rows_in_preview = MAX_ROWS_IN_PREVIEW
- out.total_number_of_rows = total_number_of_rows
- return out
- def parse_data_from_template(self):
- columns = self.parse_columns_from_header_row()
- columns = self.detect_date_formats(columns)
- columns, data = self.add_serial_no_column(columns, self.data)
+class Row:
+ link_values_exist_map = {}
- self.columns = columns
- self.rows = data
+ def __init__(self, index, row, doctype, header, import_type):
+ self.index = index
+ self.row_number = index + 1
+ self.doctype = doctype
+ self.data = row
+ self.header = header
+ self.import_type = import_type
+ self.warnings = []
- def parse_columns_from_header_row(self):
- remap_column = self.template_options.remap_column
- columns = []
- seen = []
+ len_row = len(self.data)
+ len_columns = len(self.header.columns)
+ if len_row != len_columns:
+ less_than_columns = len_row < len_columns
+ message = (
+ "Row has less values than columns"
+ if less_than_columns
+ else "Row has more values than columns"
+ )
+ self.warnings.append(
+ {"row": self.row_number, "message": message,}
+ )
- df_by_labels_and_fieldnames = self.build_fields_dict_for_column_matching()
+ def parse_doc(self, doctype, parent_doc=None, table_df=None):
+ col_indexes = self.header.get_column_indexes(doctype, table_df)
+ values = self.get_values(col_indexes)
+ columns = self.header.get_columns(col_indexes)
+ doc = self._parse_doc(doctype, columns, values, parent_doc, table_df)
+ return doc
- for i, header_title in enumerate(self.header_row):
- header_row_index = str(i)
- column_number = str(i + 1)
- skip_import = False
- fieldname = remap_column.get(header_row_index)
+ def _parse_doc(self, doctype, columns, values, parent_doc=None, table_df=None):
+ doc = frappe._dict()
+ if self.import_type == INSERT:
+ # new_doc returns a dict with default values set
+ doc = frappe.new_doc(
+ doctype,
+ parent_doc=parent_doc,
+ parentfield=table_df.fieldname if table_df else None,
+ as_dict=True,
+ )
- if fieldname and fieldname != "Don't Import":
- df = df_by_labels_and_fieldnames.get(fieldname)
+ # remove standard fields and __islocal
+ for key in frappe.model.default_fields + ("__islocal",):
+ doc.pop(key, None)
+
+ for col, value in zip(columns, values):
+ df = col.df
+ if value in INVALID_VALUES:
+ value = None
+
+ if value is not None:
+ value = self.validate_value(value, col)
+
+ if value is not None:
+ doc[df.fieldname] = self.parse_value(value, col)
+
+ is_table = frappe.get_meta(doctype).istable
+ is_update = self.import_type == UPDATE
+ if is_table and is_update and doc.get("name") in INVALID_VALUES:
+ # for table rows being inserted in update
+ # create a new doc with defaults set
+ new_doc = frappe.new_doc(doctype, as_dict=True)
+ new_doc.update(doc)
+ doc = new_doc
+
+ self.check_mandatory_fields(doctype, doc)
+ return doc
+
+ def validate_value(self, value, col):
+ df = col.df
+ if df.fieldtype == "Select":
+ select_options = df.get_select_options()
+ if select_options and value not in select_options:
+ options_string = ", ".join([frappe.bold(d) for d in select_options])
+ msg = _("Value must be one of {0}").format(options_string)
+ self.warnings.append(
+ {
+ "row": self.row_number,
+ "field": df.as_dict(convert_dates_to_str=True),
+ "message": msg,
+ }
+ )
+ return
+
+ elif df.fieldtype == "Link":
+ exists = self.link_exists(value, df)
+ if not exists:
+ msg = _("Value {0} missing for {1}").format(
+ frappe.bold(value), frappe.bold(df.options)
+ )
+ self.warnings.append(
+ {
+ "row": self.row_number,
+ "field": df.as_dict(convert_dates_to_str=True),
+ "message": msg,
+ }
+ )
+ return
+ elif df.fieldtype in ["Date", "Datetime"]:
+ value = self.get_date(value, col)
+ if isinstance(value, frappe.string_types):
+ # value was not parsed as datetime object
+ self.warnings.append(
+ {
+ "row": self.row_number,
+ "col": col.column_number,
+ "field": df.as_dict(convert_dates_to_str=True),
+ "message": _("Value {0} must in {1} format").format(
+ frappe.bold(value), frappe.bold(get_user_format(col.date_format))
+ ),
+ }
+ )
+ return
+
+ return value
+
+ def link_exists(self, value, df):
+ key = df.options + "::" + value
+ if Row.link_values_exist_map.get(key) is None:
+ Row.link_values_exist_map[key] = frappe.db.exists(df.options, value)
+ return Row.link_values_exist_map.get(key)
+
+ def parse_value(self, value, col):
+ df = col.df
+ if isinstance(value, datetime) and df.fieldtype in ["Date", "Datetime"]:
+ return value
+
+ value = cstr(value)
+
+ # convert boolean values to 0 or 1
+ valid_check_values = ["t", "f", "true", "false", "yes", "no", "y", "n"]
+ if df.fieldtype == "Check" and value.lower().strip() in valid_check_values:
+ value = value.lower().strip()
+ value = 1 if value in ["t", "true", "y", "yes"] else 0
+
+ if df.fieldtype in ["Int", "Check"]:
+ value = cint(value)
+ elif df.fieldtype in ["Float", "Percent", "Currency"]:
+ value = flt(value)
+ elif df.fieldtype in ["Date", "Datetime"]:
+ value = self.get_date(value, col)
+
+ return value
+
+ def get_date(self, value, column):
+ date_format = column.date_format
+ if date_format:
+ try:
+ return datetime.strptime(value, date_format)
+ except ValueError:
+ # ignore date values that dont match the format
+ # import will break for these values later
+ pass
+ return value
+
+ def check_mandatory_fields(self, doctype, doc):
+ """If import type is Insert:
+ Check for mandatory fields (except table fields) in doc
+ if import type is Update:
+ Check for name field or autoname field in doc
+ """
+ meta = frappe.get_meta(doctype)
+ if self.import_type == UPDATE:
+ if meta.istable:
+ # when updating records with table rows,
+ # there are two scenarios:
+ # 1. if row 'name' is provided in the template
+ # the table row will be updated
+ # 2. if row 'name' is not provided
+ # then a new row will be added
+ # so we dont need to check for mandatory
+ return
+
+ id_field = self.get_id_field(doctype)
+ if doc.get(id_field.fieldname) in INVALID_VALUES:
+ self.warnings.append(
+ {
+ "row": self.row_number,
+ "message": _("{0} is a mandatory field").format(id_field.label),
+ }
+ )
+ return
+
+ fields = [
+ df
+ for df in meta.fields
+ if df.fieldtype not in table_fieldtypes
+ and df.reqd
+ and doc.get(df.fieldname) in INVALID_VALUES
+ ]
+
+ if not fields:
+ return
+
+ if len(fields) == 1:
+ self.warnings.append(
+ {
+ "row": self.row_number,
+ "message": _("{0} is a mandatory field").format(fields[0].label),
+ }
+ )
+ else:
+ fields_string = ", ".join([df.label for df in fields])
+ self.warnings.append(
+ {
+ "row": self.row_number,
+ "message": _("{0} are mandatory fields").format(fields_string),
+ }
+ )
+
+ def get_id_field(self, doctype):
+ autoname_field = self.get_autoname_field(doctype)
+ if autoname_field:
+ return autoname_field
+ return frappe._dict({"label": "ID", "fieldname": "name", "fieldtype": "Data"})
+
+ def get_autoname_field(self, doctype):
+ meta = frappe.get_meta(doctype)
+ if meta.autoname and meta.autoname.startswith("field:"):
+ fieldname = meta.autoname[len("field:") :]
+ return meta.get_field(fieldname)
+
+ def get_values(self, indexes):
+ return [self.data[i] for i in indexes]
+
+ def get(self, index):
+ return self.data[index]
+
+ def as_list(self):
+ return self.data
+
+
+class Header(Row):
+ def __init__(self, index, row, doctype, raw_data, column_to_field_map):
+ self.index = index
+ self.row_number = index + 1
+ self.data = row
+ self.doctype = doctype
+
+ self.seen = []
+ self.columns = []
+
+ for j, header in enumerate(row):
+ column_values = [get_item_at_index(r, j) for r in raw_data]
+ column = Column(
+ j, header, self.doctype, column_values, column_to_field_map.get(header), self.seen
+ )
+ self.seen.append(header)
+ self.columns.append(column)
+
+ doctypes = []
+ for col in self.columns:
+ if not col.df:
+ continue
+ if col.df.parent == self.doctype:
+ doctypes.append((col.df.parent, None))
+ else:
+ doctypes.append((col.df.parent, col.df.child_table_df))
+
+ self.doctypes = sorted(
+ list(set(doctypes)), key=lambda x: -1 if x[0] == self.doctype else 1
+ )
+
+ def get_column_indexes(self, doctype, tablefield=None):
+ return [
+ col.index
+ for col in self.columns
+ if not col.skip_import and col.df and col.df.parent == doctype
+ ]
+
+ def get_columns(self, indexes):
+ return [self.columns[i] for i in indexes]
+
+ def get_docfields(self, indexes):
+ return [col.df for col in self.get_columns(indexes)]
+
+
+class Column:
+ seen = []
+ fields_column_map = {}
+
+ def __init__(self, index, header, doctype, column_values, map_to_field=None, seen=[]):
+ self.index = index
+ self.column_number = index + 1
+ self.doctype = doctype
+ self.header_title = header
+ self.column_values = column_values
+ self.map_to_field = map_to_field
+ self.seen = seen
+
+ self.date_format = None
+ self.df = None
+ self.skip_import = None
+ self.warnings = []
+
+ self.meta = frappe.get_meta(doctype)
+ self.parse()
+ self.parse_date_format()
+
+ def parse(self):
+ # df_by_labels_and_fieldnames = Column.build_fields_dict_for_column_matching(
+ # self.doctype
+ # )
+
+ header_title = self.header_title
+ header_row_index = str(self.index)
+ column_number = str(self.column_number)
+ skip_import = False
+
+ if self.map_to_field and self.map_to_field != "Don't Import":
+ df = get_df_for_column_header(self.doctype, self.map_to_field)
+ # df = df_by_labels_and_fieldnames.get(self.map_to_field)
+ if df:
self.warnings.append(
{
- "col": column_number,
"message": _("Mapping column {0} to field {1}").format(
frappe.bold(header_title or "
Untitled Column"), frappe.bold(df.label)
),
@@ -197,138 +809,129 @@ class Importer:
}
)
else:
- df = df_by_labels_and_fieldnames.get(header_title)
-
- if not df:
- skip_import = True
- else:
- skip_import = False
-
- if header_title in seen:
self.warnings.append(
{
- "col": column_number,
- "message": _("Skipping Duplicate Column {0}").format(frappe.bold(header_title)),
- "type": "info",
- }
- )
- df = None
- skip_import = True
- elif fieldname == "Don't Import":
- skip_import = True
- self.warnings.append(
- {
- "col": column_number,
- "message": _("Skipping column {0}").format(frappe.bold(header_title)),
- "type": "info",
- }
- )
- elif header_title and not df:
- self.warnings.append(
- {
- "col": column_number,
- "message": _("Cannot match column {0} with any field").format(
- frappe.bold(header_title)
+ "message": _("Could not map column {0} to field {1}").format(
+ column_number, self.map_to_field
),
"type": "info",
}
)
- elif not header_title and not df:
- self.warnings.append(
- {"col": column_number, "message": _("Skipping Untitled Column"), "type": "info"}
- )
+ else:
+ df = get_df_for_column_header(self.doctype, header_title)
+ # df = df_by_labels_and_fieldnames.get(header_title)
- columns.append(
- frappe._dict(
- df=df,
- skip_import=skip_import,
- header_title=header_title,
- column_number=column_number,
- index=i,
- )
- )
- seen.append(header_title)
+ if not df:
+ skip_import = True
+ else:
+ skip_import = False
- return columns
-
- def build_fields_dict_for_column_matching(self):
- """
- Build a dict with various keys to match with column headers and value as docfield
- The keys can be label or fieldname
- {
- 'Customer': df1,
- 'customer': df1,
- 'Due Date': df2,
- 'due_date': df2,
- 'Item Code (Sales Invoice Item)': df3,
- 'Sales Invoice Item:item_code': df3,
- }
- """
- out = {}
-
- table_doctypes = [df.options for df in self.meta.get_table_fields()]
- doctypes = table_doctypes + [self.doctype]
- for doctype in doctypes:
- # name field
- name_key = "ID" if self.doctype == doctype else "ID ({})".format(doctype)
- name_df = frappe._dict(
+ if header_title in self.seen:
+ self.warnings.append(
{
- "fieldtype": "Data",
- "fieldname": "name",
- "label": "ID",
- "reqd": self.import_type == UPDATE,
- "parent": doctype,
+ "col": column_number,
+ "message": _("Skipping Duplicate Column {0}").format(frappe.bold(header_title)),
+ "type": "info",
}
)
- out[name_key] = name_df
- out["name"] = name_df
+ df = None
+ skip_import = True
+ elif self.map_to_field == "Don't Import":
+ skip_import = True
+ self.warnings.append(
+ {
+ "col": column_number,
+ "message": _("Skipping column {0}").format(frappe.bold(header_title)),
+ "type": "info",
+ }
+ )
+ elif header_title and not df:
+ self.warnings.append(
+ {
+ "col": column_number,
+ "message": _("Cannot match column {0} with any field").format(
+ frappe.bold(header_title)
+ ),
+ "type": "info",
+ }
+ )
+ elif not header_title and not df:
+ self.warnings.append(
+ {"col": column_number, "message": _("Skipping Untitled Column"), "type": "info"}
+ )
- # other fields
- meta = frappe.get_meta(doctype)
- fields = self.get_standard_fields(doctype) + meta.fields
- for df in fields:
- fieldtype = df.fieldtype or "Data"
- parent = df.parent or self.doctype
- if fieldtype not in no_value_fields:
- if self.doctype == doctype:
- # for parent doctypes keys will be
- # Label
- # label
- # Label (label)
- if not out.get(df.label):
- # if Label is already set, don't set it again
- # in case of duplicate column headers
- out[df.label] = df
- out[df.fieldname] = df
- label_with_fieldname = "{0} ({1})".format(df.label, df.fieldname)
- out[label_with_fieldname] = df
- else:
- # for child doctypes keys will be
- # Label (Child DocType)
- # Child DocType:label
- # Label (label) (Child DocType)
- label = "{0} ({1})".format(df.label, parent)
- fieldname = "{0}:{1}".format(doctype, df.fieldname)
- label_with_fieldname = "{0} ({1}) ({2})".format(df.label, df.fieldname, parent)
- if not out.get(label):
- # if Label is already set, don't set it again
- # in case of duplicate column headers
- out[label] = df
- out[fieldname] = df
- out[label_with_fieldname] = df
+ self.df = df
+ self.skip_import = skip_import
- # if autoname is based on field
- # add an entry for "ID (Autoname Field)"
- autoname_field = self.get_autoname_field(self.doctype)
- if autoname_field:
- out["ID ({})".format(autoname_field.label)] = autoname_field
- # ID field should also map to the autoname field
- out["ID"] = autoname_field
- out["name"] = autoname_field
+ def parse_date_format(self):
+ if self.df and self.df.fieldtype in ("Date", "Time", "Datetime"):
+ self.date_format = self.guess_date_format_for_column()
- return out
+ def guess_date_format_for_column(self):
+ """ Guesses date format for a column by parsing the first 100 values in the column,
+ getting the date format and then returning the one which has the maximum frequency
+ """
+ PARSE_ROW_COUNT = 100
- def get_standard_fields(self, doctype):
+ date_formats = [
+ frappe.utils.guess_date_format(d) for d in self.column_values if isinstance(d, str)
+ ]
+ date_formats = [d for d in date_formats if d]
+ if not date_formats:
+ return
+
+ unique_date_formats = set(date_formats)
+ print(unique_date_formats)
+ max_occurred_date_format = max(unique_date_formats, key=date_formats.count)
+
+ # fmt: off
+ message = _("The column {0} has {1} different date formats. Automatically setting {2} as the default format as it is the most common. Please change other values in this column to this format.")
+ # fmt: on
+ user_date_format = get_user_format(max_occurred_date_format)
+ self.warnings.append(
+ {
+ "col": self.column_number,
+ "message": message.format(
+ frappe.bold(self.header_title),
+ len(unique_date_formats),
+ frappe.bold(user_date_format),
+ ),
+ "type": "info",
+ }
+ )
+
+ return max_occurred_date_format
+
+ def as_dict(self):
+ d = frappe._dict()
+ d.index = self.index
+ d.column_number = self.column_number
+ d.doctype = self.doctype
+ d.header_title = self.header_title
+ d.column_values = self.column_values
+ d.map_to_field = self.map_to_field
+ d.date_format = self.date_format
+ d.df = self.df
+ d.skip_import = self.skip_import
+ d.warnings = self.warnings
+ return d
+
+
+def build_fields_dict_for_column_matching(parent_doctype):
+ """
+ Build a dict with various keys to match with column headers and value as docfield
+ The keys can be label or fieldname
+ {
+ 'Customer': df1,
+ 'customer': df1,
+ 'Due Date': df2,
+ 'due_date': df2,
+ 'Item Code (Sales Invoice Item)': df3,
+ 'Sales Invoice Item:item_code': df3,
+ }
+ """
+
+ def get_standard_fields(doctype):
meta = frappe.get_meta(doctype)
if meta.istable:
standard_fields = [
@@ -350,714 +953,124 @@ class Importer:
out.append(df)
return out
- def detect_date_formats(self, columns):
- for col in columns:
- if col.df and col.df.fieldtype in ["Date", "Time", "Datetime"]:
- col.date_format = self.guess_date_format_for_column(col, columns)
- return columns
+ parent_meta = frappe.get_meta(parent_doctype)
+ out = {}
- def add_serial_no_column(self, columns, data):
- columns_with_serial_no = [
- frappe._dict({"header_title": "Sr. No", "skip_import": True})
- ] + columns
+ # doctypes and fieldname if it is a child doctype
+ doctypes = [[parent_doctype, None]] + [
+ [df.options, df] for df in parent_meta.get_table_fields()
+ ]
- # update index for each column
- for i, col in enumerate(columns_with_serial_no):
- col.index = i
+ for doctype, table_df in doctypes:
+ # name field
+ name_by_label = (
+ "ID" if doctype == parent_doctype else "ID ({0})".format(table_df.label)
+ )
+ name_by_fieldname = (
+ "name" if doctype == parent_doctype else "{0}.name".format(table_df.fieldname)
+ )
+ name_df = frappe._dict(
+ {
+ "fieldtype": "Data",
+ "fieldname": "name",
+ "label": "ID",
+ "reqd": 1, # self.import_type == UPDATE,
+ "parent": doctype,
+ }
+ )
- data_with_serial_no = []
- for i, row in enumerate(data):
- data_with_serial_no.append([self.row_index_map[i] + 1] + row)
+ if doctype != parent_doctype:
+ name_df.is_child_table_field = True
+ name_df.child_table_df = table_df
- return columns_with_serial_no, data_with_serial_no
+ out[name_by_label] = name_df
+ out[name_by_fieldname] = name_df
- def parse_value(self, value, df):
- if isinstance(value, datetime) and df.fieldtype in ["Date", "Datetime"]:
- return value
-
- value = cstr(value)
-
- # convert boolean values to 0 or 1
- valid_check_values = ["t", "f", "true", "false", "yes", "no", "y", "n"]
- if df.fieldtype == "Check" and value.lower().strip() in valid_check_values:
- value = value.lower().strip()
- value = 1 if value in ["t", "true", "y", "yes"] else 0
-
- if df.fieldtype in ["Int", "Check"]:
- value = cint(value)
- elif df.fieldtype in ["Float", "Percent", "Currency"]:
- value = flt(value)
- elif df.fieldtype in ["Date", "Datetime"]:
- value = self.parse_date_format(value, df)
-
- return value
-
- def parse_date_format(self, value, df):
- date_format = self.get_date_format_for_df(df) or DATETIME_FORMAT
- try:
- return datetime.strptime(value, date_format)
- except ValueError:
- # ignore date values that dont match the format
- # import will break for these values later
- pass
- return value
-
- def get_date_format_for_df(self, df):
- return self._guessed_date_formats.get(df.parent + df.fieldname)
-
- def guess_date_format_for_column(self, column, columns):
- """ Guesses date format for a column by parsing the first 10 values in the column,
- getting the date format and then returning the one which has the maximum frequency
- """
- PARSE_ROW_COUNT = 10
-
- df = column.df
- key = df.parent + df.fieldname
-
- if not self._guessed_date_formats.get(key):
- matches = [col for col in columns if col.df == df]
- if not matches:
- self._guessed_date_formats[key] = None
- return
-
- column = matches[0]
- column_index = column.index
-
- date_values = [
- row[column_index] for row in self.data[:PARSE_ROW_COUNT] if row[column_index]
- ]
- date_formats = [
- guess_date_format(d) if isinstance(d, str) else None for d in date_values
- ]
- if not date_formats:
- return
- max_occurred_date_format = max(set(date_formats), key=date_formats.count)
- self._guessed_date_formats[key] = max_occurred_date_format
-
- return self._guessed_date_formats[key]
-
- def import_data(self):
- # set user lang for translations
- frappe.cache().hdel("lang", frappe.session.user)
- frappe.set_user_lang(frappe.session.user)
-
- if not self.console:
- self.data_import.db_set("template_warnings", "")
-
- # set flags
- frappe.flags.in_import = True
- frappe.flags.mute_emails = self.data_import.mute_emails
-
- # prepare a map for missing link field values
- self.prepare_missing_link_field_values()
-
- # parse docs from rows
- payloads = self.get_payloads_for_import()
-
- # dont import if there are non-ignorable warnings
- warnings = [w for w in self.warnings if w.get("type") != "info"]
- if warnings:
- if self.console:
- self.print_grouped_warnings(warnings)
- else:
- self.data_import.db_set("template_warnings", json.dumps(warnings))
- frappe.publish_realtime(
- "data_import_refresh", {"data_import": self.data_import.name}
- )
- return
-
- # setup import log
- if self.data_import.import_log:
- import_log = frappe.parse_json(self.data_import.import_log)
- else:
- import_log = []
-
- # remove previous failures from import log
- import_log = [l for l in import_log if l.get("success") == True]
-
- # get successfully imported rows
- imported_rows = []
- for log in import_log:
- log = frappe._dict(log)
- if log.success:
- imported_rows += log.row_indexes
-
- # start import
- total_payload_count = len(payloads)
- batch_size = frappe.conf.data_import_batch_size or 1000
-
- for batch_index, batched_payloads in enumerate(
- frappe.utils.create_batch(payloads, batch_size)
- ):
- for i, payload in enumerate(batched_payloads):
- doc = payload.doc
- row_indexes = [row[0] for row in payload.rows]
- current_index = (i + 1) + (batch_index * batch_size)
-
- if set(row_indexes).intersection(set(imported_rows)):
- print("Skipping imported rows", row_indexes)
- if total_payload_count > 5:
- frappe.publish_realtime(
- "data_import_progress",
- {
- "current": current_index,
- "total": total_payload_count,
- "skipping": True,
- "data_import": self.data_import.name,
- },
- )
- continue
-
- try:
- start = timeit.default_timer()
- doc = self.process_doc(doc)
- processing_time = timeit.default_timer() - start
- eta = self.get_eta(current_index, total_payload_count, processing_time)
-
- if total_payload_count > 5:
- frappe.publish_realtime(
- "data_import_progress",
- {
- "current": current_index,
- "total": total_payload_count,
- "docname": doc.name,
- "data_import": self.data_import.name,
- "success": True,
- "row_indexes": row_indexes,
- "eta": eta,
- },
- )
- if self.console:
- update_progress_bar(
- "Importing {0} records".format(total_payload_count),
- current_index,
- total_payload_count,
- )
- import_log.append(
- frappe._dict(success=True, docname=doc.name, row_indexes=row_indexes)
+ # other fields
+ fields = get_standard_fields(doctype) + frappe.get_meta(doctype).fields
+ for df in fields:
+ fieldtype = df.fieldtype or "Data"
+ parent = df.parent or parent_doctype
+ if fieldtype not in no_value_fields:
+ if parent_doctype == doctype:
+ # for parent doctypes keys will be
+ # Label
+ # label
+ # Label (label)
+ if not out.get(df.label):
+ # if Label is already set, don't set it again
+ # in case of duplicate column headers
+ out[df.label] = df
+ out[df.fieldname] = df
+ label_with_fieldname = "{0} ({1})".format(df.label, df.fieldname)
+ out[label_with_fieldname] = df
+ else:
+ # in case there are multiple table fields with the same doctype
+ # for child doctypes keys will be
+ # Label (Table Field Label)
+ # table_field.fieldname
+ table_fields = parent_meta.get(
+ "fields", {"fieldtype": ["in", table_fieldtypes], "options": parent}
)
- # commit after every successful import
- frappe.db.commit()
+ for table_field in table_fields:
+ by_label = "{0} ({1})".format(df.label, table_field.label)
+ by_fieldname = "{0}.{1}".format(table_field.fieldname, df.fieldname)
- except Exception:
- import_log.append(
- frappe._dict(
- success=False,
- exception=frappe.get_traceback(),
- messages=frappe.local.message_log,
- row_indexes=row_indexes,
- )
- )
- frappe.clear_messages()
- # rollback if exception
- frappe.db.rollback()
+ # create a new df object to avoid mutation problems
+ if isinstance(df, dict):
+ new_df = frappe._dict(df.copy())
+ else:
+ new_df = df.as_dict()
- # set status
- failures = [l for l in import_log if l.get("success") == False]
- if len(failures) == total_payload_count:
- status = "Pending"
- elif len(failures) > 0:
- status = "Partial Success"
- else:
- status = "Success"
+ new_df.is_child_table_field = True
+ new_df.child_table_df = table_field
+ out[by_label] = new_df
+ out[by_fieldname] = new_df
- if self.console:
- self.print_import_log(import_log)
- else:
- self.data_import.db_set("status", status)
- self.data_import.db_set("import_log", json.dumps(import_log))
+ # if autoname is based on field
+ # add an entry for "ID (Autoname Field)"
+ autoname_field = get_autoname_field(parent_doctype)
+ if autoname_field:
+ out["ID ({})".format(autoname_field.label)] = autoname_field
+ # ID field should also map to the autoname field
+ out["ID"] = autoname_field
+ out["name"] = autoname_field
- frappe.flags.in_import = False
- frappe.flags.mute_emails = False
- frappe.publish_realtime("data_import_refresh", {"data_import": self.data_import.name})
+ return out
- return import_log
- def get_payloads_for_import(self):
- payloads = []
- # make a copy
- data = list(self.rows)
- while data:
- doc, rows, data = self.parse_next_row_for_import(data)
- payloads.append(frappe._dict(doc=doc, rows=rows))
- return payloads
+def get_df_for_column_header(doctype, header):
+ def build_fields_dict_for_doctype():
+ return build_fields_dict_for_column_matching(doctype)
- def parse_next_row_for_import(self, data):
- """
- Parses rows that make up a doc. A doc maybe built from a single row or multiple rows.
- Returns the doc, rows, and data without the rows.
- """
- doctypes = set([col.df.parent for col in self.columns if col.df and col.df.parent])
+ df_by_labels_and_fieldname = frappe.cache().hget(
+ "data_import_column_header_map", doctype, generator=build_fields_dict_for_doctype
+ )
+ return df_by_labels_and_fieldname.get(header)
- # first row is included by default
- first_row = data[0]
- rows = [first_row]
- # if there are child doctypes, find the subsequent rows
- if len(doctypes) > 1:
- # subsequent rows either dont have any parent value set
- # or have the same value as the parent row
- # we include a row if either of conditions match
- parent_column_indexes = [
- col.index
- for col in self.columns
- if not col.skip_import and col.df and col.df.parent == self.doctype
- ]
- parent_row_values = [first_row[i] for i in parent_column_indexes]
+# utilities
- data_without_first_row = data[1:]
- for row in data_without_first_row:
- row_values = [row[i] for i in parent_column_indexes]
- # if the row is blank, it's a child row doc
- if all([v in INVALID_VALUES for v in row_values]):
- rows.append(row)
- continue
- # if the row has same values as parent row, it's a child row doc
- if row_values == parent_row_values:
- rows.append(row)
- continue
- # if any of those conditions dont match, it's the next doc
- break
- def get_column_indexes(doctype):
- return [
- col.index
- for col in self.columns
- if not col.skip_import and col.df and col.df.parent == doctype
- ]
+def get_autoname_field(doctype):
+ meta = frappe.get_meta(doctype)
+ if meta.autoname and meta.autoname.startswith("field:"):
+ fieldname = meta.autoname[len("field:") :]
+ return meta.get_field(fieldname)
- def validate_value(value, df):
- if df.fieldtype == "Select":
- select_options = df.get_select_options()
- if select_options and value not in select_options:
- options_string = ", ".join([frappe.bold(d) for d in select_options])
- msg = _("Value must be one of {0}").format(options_string)
- self.warnings.append(
- {
- "row": row_number,
- "field": df.as_dict(convert_dates_to_str=True),
- "message": msg,
- }
- )
- return
- elif df.fieldtype == "Link":
- d = self.get_missing_link_field_values(df.options)
- if value in d.missing_values and not d.one_mandatory:
- msg = _("Value {0} missing for {1}").format(
- frappe.bold(value), frappe.bold(df.options)
- )
- self.warnings.append(
- {
- "row": row_number,
- "field": df.as_dict(convert_dates_to_str=True),
- "message": msg,
- }
- )
- return value
+def get_item_at_index(_list, i, default=None):
+ try:
+ a = _list[i]
+ except IndexError:
+ a = default
+ return a
- return value
- def parse_doc(doctype, docfields, values, row_number):
- doc = frappe._dict()
- if self.import_type == INSERT:
- # new_doc returns a dict with default values set
- doc = frappe.new_doc(doctype, as_dict=True)
-
- # remove standard fields and __islocal
- for key in frappe.model.default_fields + ("__islocal",):
- doc.pop(key, None)
-
- for df, value in zip(docfields, values):
- if value in INVALID_VALUES:
- value = None
-
- if value is not None:
- value = validate_value(value, df)
-
- if value is not None:
- doc[df.fieldname] = self.parse_value(value, df)
-
- is_table = frappe.get_meta(doctype).istable
- is_update = self.import_type == UPDATE
- if is_table and is_update and doc.get("name") in INVALID_VALUES:
- # for table rows being inserted in update
- # create a new doc with defaults set
- new_doc = frappe.new_doc(doctype, as_dict=True)
- new_doc.update(doc)
- doc = new_doc
-
- check_mandatory_fields(doctype, doc, row_number)
- return doc
-
- def check_mandatory_fields(doctype, doc, row_number):
- """If import type is Insert:
- Check for mandatory fields (except table fields) in doc
- if import type is Update:
- Check for name field or autoname field in doc
- """
- meta = frappe.get_meta(doctype)
- if self.import_type == UPDATE:
- if meta.istable:
- # when updating records with table rows,
- # there are two scenarios:
- # 1. if row 'name' is provided in the template
- # the table row will be updated
- # 2. if row 'name' is not provided
- # then a new row will be added
- # so we dont need to check for mandatory
- return
-
- id_field = self.get_id_field(doctype)
- if doc.get(id_field.fieldname) in INVALID_VALUES:
- self.warnings.append(
- {
- "row": row_number,
- "message": _("{0} is a mandatory field").format(id_field.label),
- }
- )
- return
-
- fields = [
- df
- for df in meta.fields
- if df.fieldtype not in table_fields
- and df.reqd
- and doc.get(df.fieldname) in INVALID_VALUES
- ]
-
- if not fields:
- return
-
- if len(fields) == 1:
- self.warnings.append(
- {
- "row": row_number,
- "message": _("{0} is a mandatory field").format(fields[0].label),
- }
- )
- else:
- fields_string = ", ".join([df.label for df in fields])
- self.warnings.append(
- {"row": row_number, "message": _("{0} are mandatory fields").format(fields_string)}
- )
-
- parsed_docs = {}
- for row in rows:
- for doctype in doctypes:
- if doctype == self.doctype and parsed_docs.get(doctype):
- # if parent doc is already parsed from the first row
- # then skip
- continue
-
- row_number = row[0]
- column_indexes = get_column_indexes(doctype)
- values = [row[i] for i in column_indexes]
-
- if all(v in INVALID_VALUES for v in values):
- # skip values if all of them are empty
- continue
-
- columns = [self.columns[i] for i in column_indexes]
- docfields = [col.df for col in columns]
- doc = parse_doc(doctype, docfields, values, row_number)
- parsed_docs[doctype] = parsed_docs.get(doctype, [])
- parsed_docs[doctype].append(doc)
-
- # build the doc with children
- doc = {}
- for doctype, docs in parsed_docs.items():
- if doctype == self.doctype:
- doc.update(docs[0])
- else:
- table_dfs = self.meta.get(
- "fields", {"options": doctype, "fieldtype": ["in", table_fields]}
- )
- if table_dfs:
- table_field = table_dfs[0]
- doc[table_field.fieldname] = docs
-
- # check if there is atleast one row for mandatory table fields
- mandatory_table_fields = [
- df
- for df in self.meta.fields
- if df.fieldtype in table_fields and df.reqd and len(doc.get(df.fieldname, [])) == 0
- ]
- if len(mandatory_table_fields) == 1:
- self.warnings.append(
- {
- "row": first_row[0],
- "message": _("There should be atleast one row for {0} table").format(
- mandatory_table_fields[0].label
- ),
- }
- )
- elif mandatory_table_fields:
- fields_string = ", ".join([df.label for df in mandatory_table_fields])
- message = _("There should be atleast one row for the following tables: {0}").format(
- fields_string
- )
- self.warnings.append({"row": first_row[0], "message": message})
-
- return doc, rows, data[len(rows) :]
-
- def process_doc(self, doc):
- if self.import_type == INSERT:
- return self.insert_record(doc)
- elif self.import_type == UPDATE:
- return self.update_record(doc)
-
- def insert_record(self, doc):
- self.create_missing_linked_records(doc)
-
- new_doc = frappe.new_doc(self.doctype)
- new_doc.update(doc)
- # name shouldn't be set when inserting a new record
- new_doc.set("name", None)
- new_doc.insert()
- if self.meta.is_submittable and self.data_import.submit_after_import:
- new_doc.submit()
- return new_doc
-
- def create_missing_linked_records(self, doc):
- """
- Finds fields that are of type Link, and creates the corresponding
- document automatically if it has only one mandatory field
- """
- link_values = []
-
- def get_link_fields(doc, doctype):
- for fieldname, value in doc.items():
- meta = frappe.get_meta(doctype)
- df = meta.get_field(fieldname)
- if not df:
- continue
- if df.fieldtype == "Link" and value not in INVALID_VALUES:
- link_values.append([df.options, value])
- elif df.fieldtype in table_fields:
- for row in value:
- get_link_fields(row, df.options)
-
- get_link_fields(doc, self.doctype)
-
- for link_doctype, link_value in link_values:
- d = self.missing_link_values.get(link_doctype)
- if d and d.one_mandatory and link_value in d.missing_values:
- # find the autoname field
- autoname_field = self.get_autoname_field(link_doctype)
- name_field = autoname_field.fieldname if autoname_field else "name"
- new_doc = frappe.new_doc(link_doctype)
- new_doc.set(name_field, link_value)
- new_doc.insert()
- d.missing_values.remove(link_value)
-
- def update_record(self, doc):
- id_fieldname = self.get_id_fieldname(self.doctype)
- id_value = doc[id_fieldname]
- existing_doc = frappe.get_doc(self.doctype, id_value)
- existing_doc.flags.updater_reference = {
- "doctype": self.data_import.doctype,
- "docname": self.data_import.name,
- "label": _("via Data Import"),
- }
- existing_doc.update(doc)
- existing_doc.save()
- return existing_doc
-
- def export_errored_rows(self):
- from frappe.utils.csvutils import build_csv_response
-
- if not self.data_import:
- return
-
- import_log = frappe.parse_json(self.data_import.import_log or "[]")
- failures = [l for l in import_log if l.get("success") == False]
- row_indexes = []
- for f in failures:
- row_indexes.extend(f.get("row_indexes", []))
-
- # de duplicate
- row_indexes = list(set(row_indexes))
- row_indexes.sort()
-
- header_row = [col.header_title for col in self.columns[1:]]
- rows = [header_row]
- rows += [row[1:] for row in self.rows if row[0] in row_indexes]
-
- build_csv_response(rows, self.doctype)
-
- def get_missing_link_field_values(self, doctype):
- return self.missing_link_values.get(doctype, {})
-
- def prepare_missing_link_field_values(self):
- columns = self.columns
- rows = self.rows
- link_column_indexes = [
- col.index for col in columns if col.df and col.df.fieldtype == "Link"
- ]
-
- self.missing_link_values = {}
- for index in link_column_indexes:
- col = columns[index]
- column_values = [row[index] for row in rows]
- values = set([v for v in column_values if v not in INVALID_VALUES])
- doctype = col.df.options
-
- missing_values = [value for value in values if not frappe.db.exists(doctype, value)]
- if self.missing_link_values.get(doctype):
- self.missing_link_values[doctype].missing_values += missing_values
- else:
- self.missing_link_values[doctype] = frappe._dict(
- missing_values=missing_values,
- one_mandatory=self.has_one_mandatory_field(doctype),
- df=col.df,
- )
-
- def get_eta(self, current, total, processing_time):
- remaining = total - current
- eta = processing_time * remaining
- if not self.last_eta or eta < self.last_eta:
- self.last_eta = eta
- return self.last_eta
-
- def has_one_mandatory_field(self, doctype):
- meta = frappe.get_meta(doctype)
- # get mandatory fields with default not set
- mandatory_fields = [df for df in meta.fields if df.reqd and not df.default]
- mandatory_fields_count = len(mandatory_fields)
- if meta.autoname and meta.autoname.lower() == "prompt":
- mandatory_fields_count += 1
- return mandatory_fields_count == 1
-
- def get_id_fieldname(self, doctype):
- return self.get_id_field(doctype).fieldname
-
- def get_id_field(self, doctype):
- autoname_field = self.get_autoname_field(doctype)
- if autoname_field:
- return autoname_field
- return frappe._dict({"label": "ID", "fieldname": "name", "fieldtype": "Data"})
-
- def get_autoname_field(self, doctype):
- meta = frappe.get_meta(doctype)
- if meta.autoname and meta.autoname.startswith("field:"):
- fieldname = meta.autoname[len("field:") :]
- return meta.get_field(fieldname)
-
- def print_grouped_warnings(self, warnings):
- warnings_by_row = {}
- other_warnings = []
- for w in warnings:
- if w.get("row"):
- warnings_by_row.setdefault(w.get("row"), []).append(w)
- else:
- other_warnings.append(w)
-
- for row_number, warnings in warnings_by_row.items():
- print("Row {0}".format(row_number))
- for w in warnings:
- print(w.get("message"))
-
- for w in other_warnings:
- print(w.get("message"))
-
- def print_import_log(self, import_log):
- failed_records = [l for l in import_log if not l.success]
- successful_records = [l for l in import_log if l.success]
-
- if successful_records:
- print(
- "Successfully imported {0} records out of {1}".format(
- len(successful_records), len(import_log)
- )
- )
-
- if failed_records:
- print("Failed to import {0} records".format(len(failed_records)))
- file_name = "{0}_import_on_{1}.txt".format(self.doctype, frappe.utils.now())
- print("Check {0} for errors".format(os.path.join("sites", file_name)))
- text = ""
- for w in failed_records:
- text += "Row Indexes: {0}\n".format(str(w.get("row_indexes", [])))
- text += "Messages:\n{0}\n".format("\n".join(w.get("messages", [])))
- text += "Traceback:\n{0}\n\n".format(w.get("exception"))
-
- with open(file_name, "w") as f:
- f.write(text)
-
-
-DATE_FORMATS = [
- r"%d-%m-%Y",
- r"%m-%d-%Y",
- r"%Y-%m-%d",
- r"%d-%m-%y",
- r"%m-%d-%y",
- r"%y-%m-%d",
- r"%d/%m/%Y",
- r"%m/%d/%Y",
- r"%Y/%m/%d",
- r"%d/%m/%y",
- r"%m/%d/%y",
- r"%y/%m/%d",
- r"%d.%m.%Y",
- r"%m.%d.%Y",
- r"%Y.%m.%d",
- r"%d.%m.%y",
- r"%m.%d.%y",
- r"%y.%m.%d",
-]
-
-TIME_FORMATS = [
- r"%H:%M:%S.%f",
- r"%H:%M:%S",
- r"%H:%M",
- r"%I:%M:%S.%f %p",
- r"%I:%M:%S %p",
- r"%I:%M %p",
-]
-
-
-def guess_date_format(date_string):
- date_string = date_string.strip()
-
- _date = None
- _time = None
-
- if " " in date_string:
- _date, _time = date_string.split(" ", 1)
- else:
- _date = date_string
-
- date_format = None
- time_format = None
-
- for f in DATE_FORMATS:
- try:
- # if date is parsed without any exception
- # capture the date format
- datetime.strptime(_date, f)
- date_format = f
- break
- except ValueError:
- pass
-
- if _time:
- for f in TIME_FORMATS:
- try:
- # if time is parsed without any exception
- # capture the time format
- datetime.strptime(_time, f)
- time_format = f
- break
- except ValueError:
- pass
-
- full_format = date_format
- if time_format:
- full_format += " " + time_format
- return full_format
-
-
-def import_data(doctype, file_path):
- i = Importer(doctype, file_path)
- i.import_data()
+def get_user_format(date_format):
+ return (
+ date_format.replace("%Y", "yyyy")
+ .replace("%y", "yy")
+ .replace("%m", "mm")
+ .replace("%d", "dd")
+ )
diff --git a/frappe/public/js/frappe/data_import/data_exporter.js b/frappe/public/js/frappe/data_import/data_exporter.js
index 8276be6670..21f0b78a25 100644
--- a/frappe/public/js/frappe/data_import/data_exporter.js
+++ b/frappe/public/js/frappe/data_import/data_exporter.js
@@ -202,16 +202,16 @@ frappe.data_import.DataExporter = class DataExporter {
}
select_mandatory() {
- let mandatory_table_doctypes = frappe.meta
+ let mandatory_table_fields = frappe.meta
.get_table_fields(this.doctype)
.filter(df => df.reqd)
- .map(df => df.options);
- mandatory_table_doctypes.push(this.doctype);
+ .map(df => df.fieldname);
+ mandatory_table_fields.push(this.doctype);
let multicheck_fields = this.dialog.fields
.filter(df => df.fieldtype === 'MultiCheck')
.map(df => df.fieldname)
- .filter(doctype => mandatory_table_doctypes.includes(doctype));
+ .filter(doctype => mandatory_table_fields.includes(doctype));
let checkboxes = [].concat(
...multicheck_fields.map(fieldname => {
@@ -333,16 +333,24 @@ frappe.data_import.DataExporter = class DataExporter {
}
};
-function get_columns_for_picker(doctype) {
+export function get_columns_for_picker(doctype) {
let out = {};
- const standard_fields_filter = df =>
- !in_list(frappe.model.no_value_type, df.fieldtype);
+ const exportable_fields = df => {
+ let keep = true;
+ if (frappe.model.no_value_type.includes(df.fieldtype)) {
+ keep = false;
+ }
+ if (['lft', 'rgt'].includes(df.fieldname)) {
+ keep = false;
+ }
+ return keep;
+ };
// parent
let doctype_fields = frappe.meta
.get_docfields(doctype)
- .filter(standard_fields_filter);
+ .filter(exportable_fields);
out[doctype] = [
{
@@ -359,7 +367,7 @@ function get_columns_for_picker(doctype) {
const cdt = df.options;
const child_table_fields = frappe.meta
.get_docfields(cdt)
- .filter(standard_fields_filter);
+ .filter(exportable_fields);
out[df.fieldname] = [
{
diff --git a/frappe/public/js/frappe/data_import/import_preview.js b/frappe/public/js/frappe/data_import/import_preview.js
index 27d81b75b7..7cf8431456 100644
--- a/frappe/public/js/frappe/data_import/import_preview.js
+++ b/frappe/public/js/frappe/data_import/import_preview.js
@@ -1,5 +1,5 @@
import DataTable from 'frappe-datatable';
-import ColumnPickerFields from './column_picker_fields';
+import { get_columns_for_picker } from './data_exporter';
frappe.provide('frappe.data_import');
@@ -236,9 +236,7 @@ frappe.data_import.ImportPreview = class ImportPreview {
}
show_column_mapper() {
- let column_picker_fields = new ColumnPickerFields({
- doctype: this.doctype
- });
+ let column_picker_fields = get_columns_for_picker(this.doctype);
let changed = [];
let fields = this.preview_data.columns.map((col, i) => {
let df = col.df;
diff --git a/frappe/public/less/form.less b/frappe/public/less/form.less
index df0334c14f..cd391c1f10 100644
--- a/frappe/public/less/form.less
+++ b/frappe/public/less/form.less
@@ -249,6 +249,7 @@
}
.progress-message {
+ font-feature-settings: "tnum" 1;
margin-top: 0px;
}
}
@@ -1011,7 +1012,7 @@ body[data-route^="Form/Communication"] textarea[data-fieldname="subject"] {
.map-columns .form-section {
padding: 0 7px 7px;
- border-bottom: none;
+ border-top: none;
.clearfix {
display: none;
@@ -1021,3 +1022,7 @@ body[data-route^="Form/Communication"] textarea[data-fieldname="subject"] {
.map-columns .form-section:first-child {
padding-top: 7px;
}
+
+.table-preview {
+ margin-top: 12px;
+}
diff --git a/frappe/utils/data.py b/frappe/utils/data.py
index 7e991f472e..0d946c01a8 100644
--- a/frappe/utils/data.py
+++ b/frappe/utils/data.py
@@ -1185,3 +1185,75 @@ def is_subset(list_a, list_b):
def generate_hash(*args, **kwargs):
return frappe.generate_hash(*args, **kwargs)
+
+
+
+def guess_date_format(date_string):
+ DATE_FORMATS = [
+ r"%d-%m-%Y",
+ r"%m-%d-%Y",
+ r"%Y-%m-%d",
+ r"%d-%m-%y",
+ r"%m-%d-%y",
+ r"%y-%m-%d",
+ r"%d/%m/%Y",
+ r"%m/%d/%Y",
+ r"%Y/%m/%d",
+ r"%d/%m/%y",
+ r"%m/%d/%y",
+ r"%y/%m/%d",
+ r"%d.%m.%Y",
+ r"%m.%d.%Y",
+ r"%Y.%m.%d",
+ r"%d.%m.%y",
+ r"%m.%d.%y",
+ r"%y.%m.%d",
+ ]
+
+ TIME_FORMATS = [
+ r"%H:%M:%S.%f",
+ r"%H:%M:%S",
+ r"%H:%M",
+ r"%I:%M:%S.%f %p",
+ r"%I:%M:%S %p",
+ r"%I:%M %p",
+ ]
+
+ date_string = date_string.strip()
+
+ _date = None
+ _time = None
+
+ if " " in date_string:
+ _date, _time = date_string.split(" ", 1)
+ else:
+ _date = date_string
+
+ date_format = None
+ time_format = None
+
+ for f in DATE_FORMATS:
+ try:
+ # if date is parsed without any exception
+ # capture the date format
+ datetime.datetime.strptime(_date, f)
+ date_format = f
+ break
+ except ValueError:
+ pass
+
+ if _time:
+ for f in TIME_FORMATS:
+ try:
+ # if time is parsed without any exception
+ # capture the time format
+ datetime.datetime.strptime(_time, f)
+ time_format = f
+ break
+ except ValueError:
+ pass
+
+ full_format = date_format
+ if time_format:
+ full_format += " " + time_format
+ return full_format