From 368283769f06f8ca2f3110f4fbf32facbb1e5535 Mon Sep 17 00:00:00 2001 From: barredterra <14891507+barredterra@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:16:38 +0100 Subject: [PATCH] refactor: move `extract_messages_from_code` to a separate utils file In order to avoid circular imports. --- frappe/gettext/extractors/utils.py | 80 ++++++++++++++++++++++++++++++ frappe/translate.py | 79 +---------------------------- 2 files changed, 81 insertions(+), 78 deletions(-) create mode 100644 frappe/gettext/extractors/utils.py diff --git a/frappe/gettext/extractors/utils.py b/frappe/gettext/extractors/utils.py new file mode 100644 index 0000000000..4becbbe64e --- /dev/null +++ b/frappe/gettext/extractors/utils.py @@ -0,0 +1,80 @@ +import re + +import frappe +from frappe.model.utils import InvalidIncludePath, render_include + +TRANSLATE_PATTERN = re.compile( + r"_\(\s*" # starts with literal `_(`, ignore following whitespace/newlines + # BEGIN: message search + r"([\"']{,3})" # start of message string identifier - allows: ', ", """, '''; 1st capture group + r"(?P((?!\1).)*)" # Keep matching until string closing identifier is met which is same as 1st capture group + r"\1" # match exact string closing identifier + # END: message search + # BEGIN: python context search + r"(\s*,\s*context\s*=\s*" # capture `context=` with ignoring whitespace + r"([\"'])" # start of context string identifier; 5th capture group + r"(?P((?!\5).)*)" # capture context string till closing id is found + r"\5" # match context string closure + r")?" # match 0 or 1 context strings + # END: python context search + # BEGIN: JS context search + r"(\s*,\s*(.)*?\s*(,\s*" # skip message format replacements: ["format", ...] | null | [] + r"([\"'])" # start of context string; 11th capture group + r"(?P((?!\11).)*)" # capture context string till closing id is found + r"\11" # match context string closure + r")*" + r")*" # match one or more context string + # END: JS context search + r"\s*\)" # Closing function call ignore leading whitespace/newlines +) + + +def extract_messages_from_code(code): + """ + Extracts translatable strings from a code file + :param code: code from which translatable files are to be extracted + """ + from jinja2 import TemplateError + + try: + code = frappe.as_unicode(render_include(code)) + + # Exception will occur when it encounters John Resig's microtemplating code + except (TemplateError, ImportError, InvalidIncludePath, OSError) as e: + if isinstance(e, InvalidIncludePath) and hasattr(frappe.local, "message_log"): + frappe.clear_last_message() + + messages = [] + + for m in TRANSLATE_PATTERN.finditer(code): + message = m.group("message") + context = m.group("py_context") or m.group("js_context") + pos = m.start() + + if is_translatable(message): + messages.append([pos, message, context]) + + return add_line_number(messages, code) + + +def is_translatable(m): + return bool( + re.search("[a-zA-Z]", m) + and not m.startswith("fa fa-") + and not m.endswith("px") + and not m.startswith("eval:") + ) + + +def add_line_number(messages, code): + ret = [] + messages = sorted(messages, key=lambda x: x[0]) + newlines = [m.start() for m in re.compile(r"\n").finditer(code)] + line = 1 + newline_i = 0 + for pos, message, context in messages: + while newline_i < len(newlines) and pos > newlines[newline_i]: + line += 1 + newline_i += 1 + ret.append([line, message, context]) + return ret diff --git a/frappe/translate.py b/frappe/translate.py index fc4461e102..7e270eee09 100644 --- a/frappe/translate.py +++ b/frappe/translate.py @@ -20,35 +20,11 @@ from csv import reader, writer import frappe from frappe.gettext.extractors.javascript import extract_javascript +from frappe.gettext.extractors.utils import extract_messages_from_code, is_translatable from frappe.gettext.translate import get_translations_from_mo -from frappe.model.utils import InvalidIncludePath, render_include from frappe.query_builder import DocType, Field from frappe.utils import cstr, get_bench_path, is_html, strip, strip_html_tags, unique -TRANSLATE_PATTERN = re.compile( - r"_\(\s*" # starts with literal `_(`, ignore following whitespace/newlines - # BEGIN: message search - r"([\"']{,3})" # start of message string identifier - allows: ', ", """, '''; 1st capture group - r"(?P((?!\1).)*)" # Keep matching until string closing identifier is met which is same as 1st capture group - r"\1" # match exact string closing identifier - # END: message search - # BEGIN: python context search - r"(\s*,\s*context\s*=\s*" # capture `context=` with ignoring whitespace - r"([\"'])" # start of context string identifier; 5th capture group - r"(?P((?!\5).)*)" # capture context string till closing id is found - r"\5" # match context string closure - r")?" # match 0 or 1 context strings - # END: python context search - # BEGIN: JS context search - r"(\s*,\s*(.)*?\s*(,\s*" # skip message format replacements: ["format", ...] | null | [] - r"([\"'])" # start of context string; 11th capture group - r"(?P((?!\11).)*)" # capture context string till closing id is found - r"\11" # match context string closure - r")*" - r")*" # match one or more context string - # END: JS context search - r"\s*\)" # Closing function call ignore leading whitespace/newlines -) REPORT_TRANSLATE_PATTERN = re.compile('"([^:,^"]*):') CSV_STRIP_WHITESPACE_PATTERN = re.compile(r"{\s?([0-9]+)\s?}") @@ -676,59 +652,6 @@ def extract_messages_from_javascript_code(code: str) -> list[tuple[int, str, str return messages -def extract_messages_from_code(code): - """ - Extracts translatable strings from a code file - :param code: code from which translatable files are to be extracted - """ - from jinja2 import TemplateError - - try: - code = frappe.as_unicode(render_include(code)) - - # Exception will occur when it encounters John Resig's microtemplating code - except (TemplateError, ImportError, InvalidIncludePath, OSError) as e: - if isinstance(e, InvalidIncludePath): - frappe.clear_last_message() - - messages = [] - - for m in TRANSLATE_PATTERN.finditer(code): - message = m.group("message") - context = m.group("py_context") or m.group("js_context") - pos = m.start() - - if is_translatable(message): - messages.append([pos, message, context]) - - return add_line_number(messages, code) - - -def is_translatable(m): - if ( - re.search("[a-zA-Z]", m) - and not m.startswith("fa fa-") - and not m.endswith("px") - and not m.startswith("eval:") - ): - return True - return False - - -def add_line_number(messages, code): - ret = [] - messages = sorted(messages, key=lambda x: x[0]) - newlines = [m.start() for m in re.compile(r"\n").finditer(code)] - line = 1 - newline_i = 0 - for pos, message, context in messages: - while newline_i < len(newlines) and pos > newlines[newline_i]: - line += 1 - newline_i += 1 - ret.append([line, message, context]) - return ret - - def read_csv_file(path): """Read CSV file and return as list of list