diff --git a/frappe/gettext/extractors/javascript.py b/frappe/gettext/extractors/javascript.py index d632422ada..4e6b81cc20 100644 --- a/frappe/gettext/extractors/javascript.py +++ b/frappe/gettext/extractors/javascript.py @@ -1,26 +1,163 @@ -from babel.messages.extract import extract_javascript +from io import BufferedReader -def extract(fileobj, keywords, comment_tags, options): - # We use `__` as our translation function - keywords = "__" +def extract(fileobj: BufferedReader, keywords: str, comment_tags: tuple, options: dict): + code = fileobj.read().decode("utf-8") + + for lineno, funcname, messages in extract_javascript(code, "__", options): + if not messages or not messages[0]: + continue - for lineno, funcname, messages, comments in extract_javascript( - fileobj, keywords, comment_tags, options - ): # `funcname` here will be `__` which is our translation function. We # have to convert it back to usual function names funcname = "gettext" if isinstance(messages, tuple): - if len(messages) == 3: + if len(messages) == 3 and messages[2]: funcname = "pgettext" messages = (messages[2], messages[0]) else: messages = messages[0] - # ignore empty messages like `__(myvar)`` - if not messages: - continue + yield lineno, funcname, messages, [] - yield lineno, funcname, messages, comments + +def extract_javascript(code, keywords=("__",), options=None): + """Extract messages from JavaScript source code. + + This is a modified version of babel's JS parser. Reused under BSD license. + License: https://github.com/python-babel/babel/blob/master/LICENSE + + Changes from upstream: + - Preserve arguments, babel's parser flattened all values in args, + we need order because we use different syntax for translation + which can contain 2nd arg which is array of many values. If + argument is non-primitive type then value is NOT returned in + args. + E.g. __("0", ["1", "2"], "3") -> ("0", None, "3") + - remove comments support + - changed signature to accept string directly. + + :param code: code as string + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param options: a dictionary of additional options (optional) + Supported options are: + * `template_string` -- set to false to disable ES6 + template string support. + """ + from babel.messages.jslexer import Token, tokenize, unquote_string + + if options is None: + options = {} + + funcname = message_lineno = None + messages = [] + last_argument = None + concatenate_next = False + last_token = None + call_stack = -1 + + # Tree level = depth inside function call tree + # Example: __("0", ["1", "2"], "3") + # Depth __() + # / | \ + # 0 "0" [...] "3" <- only 0th level strings matter + # / \ + # 1 "1" "2" + tree_level = 0 + opening_operators = {"[", "{"} + closing_operators = {"]", "}"} + all_container_operators = opening_operators.union(closing_operators) + dotted = any("." in kw for kw in keywords) + + for token in tokenize( + code, + jsx=True, + template_string=options.get("template_string", True), + dotted=dotted, + ): + if ( # Turn keyword`foo` expressions into keyword("foo") calls: + funcname + and (last_token and last_token.type == "name") # have a keyword... + and token.type # we've seen nothing after the keyword... + == "template_string" # this is a template string + ): + message_lineno = token.lineno + messages = [unquote_string(token.value)] + call_stack = 0 + tree_level = 0 + token = Token("operator", ")", token.lineno) + + if token.type == "operator" and token.value == "(": + if funcname: + message_lineno = token.lineno + call_stack += 1 + + elif call_stack >= 0 and token.type == "operator" and token.value in all_container_operators: + if token.value in opening_operators: + tree_level += 1 + if token.value in closing_operators: + tree_level -= 1 + + elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment": + pass # ignore comments + + elif funcname and call_stack == 0: + if token.type == "operator" and token.value == ")": + if last_argument is not None: + messages.append(last_argument) + if len(messages) > 1: + messages = tuple(messages) + elif messages: + messages = messages[0] + else: + messages = None + + if messages is not None: + yield (message_lineno, funcname, messages) + + funcname = message_lineno = last_argument = None + concatenate_next = False + messages = [] + call_stack = -1 + tree_level = 0 + + elif token.type in ("string", "template_string"): + new_value = unquote_string(token.value) + if tree_level > 0: + pass + elif concatenate_next: + last_argument = (last_argument or "") + new_value + concatenate_next = False + else: + last_argument = new_value + + elif token.type == "operator": + if token.value == ",": + if last_argument is not None: + messages.append(last_argument) + last_argument = None + else: + if tree_level == 0: + messages.append(None) + concatenate_next = False + elif token.value == "+": + concatenate_next = True + + elif call_stack > 0 and token.type == "operator" and token.value == ")": + call_stack -= 1 + tree_level = 0 + + elif funcname and call_stack == -1: + funcname = None + + elif ( + call_stack == -1 + and token.type == "name" + and token.value in keywords + and (last_token is None or last_token.type != "name" or last_token.value != "function") + ): + funcname = token.value + + last_token = token diff --git a/frappe/tests/test_translate.py b/frappe/tests/test_translate.py index 21cf6dfd28..439cf3df47 100644 --- a/frappe/tests/test_translate.py +++ b/frappe/tests/test_translate.py @@ -8,12 +8,12 @@ from unittest.mock import patch import frappe import frappe.translate from frappe import _ +from frappe.gettext.extractors.javascript import extract_javascript from frappe.tests.utils import FrappeTestCase from frappe.translate import ( MERGED_TRANSLATION_KEY, USER_TRANSLATION_KEY, clear_cache, - extract_javascript, extract_messages_from_javascript_code, extract_messages_from_python_code, get_language, diff --git a/frappe/translate.py b/frappe/translate.py index 84dd7bb90e..796d964bed 100644 --- a/frappe/translate.py +++ b/frappe/translate.py @@ -19,6 +19,7 @@ from contextlib import contextmanager, suppress from csv import reader, writer import frappe +from frappe.gettext.extractors.javascript import extract_javascript from frappe.gettext.translate import get_translations_from_mo from frappe.model.utils import InvalidIncludePath, render_include from frappe.query_builder import DocType, Field @@ -675,147 +676,6 @@ def extract_messages_from_javascript_code(code: str) -> list[tuple[int, str, str return messages -def extract_javascript(code, keywords=("__",), options=None): - """Extract messages from JavaScript source code. - - This is a modified version of babel's JS parser. Reused under BSD license. - License: https://github.com/python-babel/babel/blob/master/LICENSE - - Changes from upstream: - - Preserve arguments, babel's parser flattened all values in args, - we need order because we use different syntax for translation - which can contain 2nd arg which is array of many values. If - argument is non-primitive type then value is NOT returned in - args. - E.g. __("0", ["1", "2"], "3") -> ("0", None, "3") - - remove comments support - - changed signature to accept string directly. - - :param code: code as string - :param keywords: a list of keywords (i.e. function names) that should be - recognized as translation functions - :param options: a dictionary of additional options (optional) - Supported options are: - * `template_string` -- set to false to disable ES6 - template string support. - """ - from babel.messages.jslexer import Token, tokenize, unquote_string - - if options is None: - options = {} - - funcname = message_lineno = None - messages = [] - last_argument = None - concatenate_next = False - last_token = None - call_stack = -1 - - # Tree level = depth inside function call tree - # Example: __("0", ["1", "2"], "3") - # Depth __() - # / | \ - # 0 "0" [...] "3" <- only 0th level strings matter - # / \ - # 1 "1" "2" - tree_level = 0 - opening_operators = {"[", "{"} - closing_operators = {"]", "}"} - all_container_operators = opening_operators.union(closing_operators) - dotted = any("." in kw for kw in keywords) - - for token in tokenize( - code, - jsx=True, - template_string=options.get("template_string", True), - dotted=dotted, - ): - if ( # Turn keyword`foo` expressions into keyword("foo") calls: - funcname - and (last_token and last_token.type == "name") # have a keyword... - and token.type # we've seen nothing after the keyword... - == "template_string" # this is a template string - ): - message_lineno = token.lineno - messages = [unquote_string(token.value)] - call_stack = 0 - tree_level = 0 - token = Token("operator", ")", token.lineno) - - if token.type == "operator" and token.value == "(": - if funcname: - message_lineno = token.lineno - call_stack += 1 - - elif call_stack >= 0 and token.type == "operator" and token.value in all_container_operators: - if token.value in opening_operators: - tree_level += 1 - if token.value in closing_operators: - tree_level -= 1 - - elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment": - pass # ignore comments - - elif funcname and call_stack == 0: - if token.type == "operator" and token.value == ")": - if last_argument is not None: - messages.append(last_argument) - if len(messages) > 1: - messages = tuple(messages) - elif messages: - messages = messages[0] - else: - messages = None - - if messages is not None: - yield (message_lineno, funcname, messages) - - funcname = message_lineno = last_argument = None - concatenate_next = False - messages = [] - call_stack = -1 - tree_level = 0 - - elif token.type in ("string", "template_string"): - new_value = unquote_string(token.value) - if tree_level > 0: - pass - elif concatenate_next: - last_argument = (last_argument or "") + new_value - concatenate_next = False - else: - last_argument = new_value - - elif token.type == "operator": - if token.value == ",": - if last_argument is not None: - messages.append(last_argument) - last_argument = None - else: - if tree_level == 0: - messages.append(None) - concatenate_next = False - elif token.value == "+": - concatenate_next = True - - elif call_stack > 0 and token.type == "operator" and token.value == ")": - call_stack -= 1 - tree_level = 0 - - elif funcname and call_stack == -1: - funcname = None - - elif ( - call_stack == -1 - and token.type == "name" - and token.value in keywords - and (last_token is None or last_token.type != "name" or last_token.value != "function") - ): - funcname = token.value - - last_token = token - - def extract_messages_from_code(code): """ Extracts translatable strings from a code file