diff --git a/frappe/tests/test_translate.py b/frappe/tests/test_translate.py index ccf079d00a..4fbb2eadfd 100644 --- a/frappe/tests/test_translate.py +++ b/frappe/tests/test_translate.py @@ -10,6 +10,8 @@ import frappe import frappe.translate from frappe import _ from frappe.translate import ( + extract_javascript, + extract_messages_from_javascript_code, extract_messages_from_python_code, get_language, get_parent_language, @@ -131,7 +133,7 @@ class TestTranslate(unittest.TestCase): """Load all CSV files to ensure they have correct format""" verify_translation_files("frappe") - def test_python_ast_extractor(self): + def test_python_extractor(self): code = textwrap.dedent( """ @@ -160,7 +162,61 @@ class TestTranslate(unittest.TestCase): ] output = extract_messages_from_python_code(code) - self.assertEqual(output, expected_output, msg=output) + self.assertEqual(len(expected_output), len(output)) + for expected, actual in zip(expected_output, output): + with self.subTest(): + self.assertEqual(expected, actual) + + def test_js_extractor(self): + + code = textwrap.dedent( + """ + __("attr") + __("attr with", null, "context") + __("attr with", ["format", "replacements"], "context") + __("attr with", ["format", "replacements"]) + __( + "Long JS string with", [ + "format", "replacements" + ], + "JS context on newline" + ) + __( + "Long JS string with formats only {0}", [ + "format", "replacements" + ], + ) + _(`template strings not supported yet`) + """ + ) + expected_output = [ + (2, "attr", None), + (3, "attr with", "context"), + (4, "attr with", "context"), + (5, "attr with", None), + (6, "Long JS string with", "JS context on newline"), + (12, "Long JS string with formats only {0}", None), + ] + + output = extract_messages_from_javascript_code(code) + + self.assertEqual(len(expected_output), len(output)) + for expected, actual in zip(expected_output, output): + with self.subTest(): + self.assertEqual(expected, actual) + + def test_js_parser_arg_capturing(self): + """Get non-flattened args in correct order so 3rd arg if present is always context.""" + + def get_args(code): + *__, args = next(extract_javascript(code)) + return args + + args = get_args("""__("attr with", ["format", "replacements"], "context")""") + self.assertEqual(args, ("attr with", None, "context")) + + args = get_args("""__("attr with", ["format", "replacements"])""") + self.assertEqual(args, ("attr with", None)) def verify_translation_files(app): diff --git a/frappe/translate.py b/frappe/translate.py index 1ba509b632..6e73fb0c40 100644 --- a/frappe/translate.py +++ b/frappe/translate.py @@ -17,6 +17,7 @@ import re from csv import reader from babel.messages.extract import extract_python +from babel.messages.jslexer import Token, tokenize, unquote_string from pypika.terms import PseudoColumn import frappe @@ -707,6 +708,8 @@ def get_messages_from_file(path: str) -> list[tuple[str, str, str | None, int]]: if path.lower().endswith(".py"): messages = extract_messages_from_python_code(file_contents) + elif path.lower().endswith(".js"): + messages = extract_messages_from_javascript_code(file_contents) else: messages = extract_messages_from_code(file_contents) return [ @@ -718,7 +721,7 @@ def get_messages_from_file(path: str) -> list[tuple[str, str, str | None, int]]: def extract_messages_from_python_code(code: str) -> list[tuple[int, str, str | None]]: - """Extracts translatable strings from python code using AST""" + """Extracts translatable strings from Python code using babel.""" messages = [] @@ -741,6 +744,147 @@ def extract_messages_from_python_code(code: str) -> list[tuple[int, str, str | N return messages +def extract_messages_from_javascript_code(code: str) -> list[tuple[int, str, str | None]]: + """Extracts translatable strings from JavaScript code using babel.""" + + messages = [] + + for message in extract_javascript( + code, + keywords=["__"], + options={}, + ): + lineno, _func, args = message + + if not args or not args[0]: + continue + + source_text = args[0] if isinstance(args, tuple) else args + context = None + + if isinstance(args, tuple) and len(args) == 3 and isinstance(args[2], str): + context = args[2] + + messages.append((lineno, source_text, context)) + + return messages + + +def extract_javascript(code, keywords=("__"), options=None): + """Extract messages from JavaScript source code. + + This is a modified version of babel's JS parser. Reused under BSD license. + License: https://github.com/python-babel/babel/blob/master/LICENSE + + Changes from upstream: + - Preserve arguments, babel's parser flattened all values in args, + we need order because we use different syntax for translation + which can contain 2nd arg which is array of many values. If + argument is non-primitive type then value is NOT returned in + args. + E.g. __("0", ["1", "2"], "3") -> ("0", None, "3") + - remove comments support + - changed signature to accept string directly. + + :param code: code as string + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param options: a dictionary of additional options (optional) + Supported options are: + * `template_string` -- set to false to disable ES6 + template string support. + """ + if options is None: + options = {} + + funcname = message_lineno = None + messages = [] + last_argument = None + concatenate_next = False + last_token = None + call_stack = -1 + dotted = any("." in kw for kw in keywords) + + for token in tokenize( + code, + jsx=True, + template_string=options.get("template_string", True), + dotted=dotted, + ): + if ( # Turn keyword`foo` expressions into keyword("foo") calls: + funcname + and (last_token and last_token.type == "name") # have a keyword... + and token.type # we've seen nothing after the keyword... + == "template_string" # this is a template string + ): + message_lineno = token.lineno + messages = [unquote_string(token.value)] + call_stack = 0 + token = Token("operator", ")", token.lineno) + + if token.type == "operator" and token.value == "(": + if funcname: + message_lineno = token.lineno + call_stack += 1 + + elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment": + pass + + elif funcname and call_stack == 0: + if token.type == "operator" and token.value == ")": + if last_argument is not None: + messages.append(last_argument) + if len(messages) > 1: + messages = tuple(messages) + elif messages: + messages = messages[0] + else: + messages = None + + if messages is not None: + yield (message_lineno, funcname, messages) + + funcname = message_lineno = last_argument = None + concatenate_next = False + messages = [] + call_stack = -1 + + elif token.type in ("string", "template_string"): + new_value = unquote_string(token.value) + if concatenate_next: + last_argument = (last_argument or "") + new_value + concatenate_next = False + else: + last_argument = new_value + + elif token.type == "operator": + if token.value == ",": + if last_argument is not None: + messages.append(last_argument) + last_argument = None + else: + messages.append(None) + concatenate_next = False + elif token.value == "+": + concatenate_next = True + + elif call_stack > 0 and token.type == "operator" and token.value == ")": + call_stack -= 1 + + elif funcname and call_stack == -1: + funcname = None + + elif ( + call_stack == -1 + and token.type == "name" + and token.value in keywords + and (last_token is None or last_token.type != "name" or last_token.value != "function") + ): + funcname = token.value + + last_token = token + + def extract_messages_from_code(code): """ Extracts translatable strings from a code file