feat: JS extractor and custom JS parser
This commit is contained in:
parent
1425842ef0
commit
cd53466b6a
2 changed files with 203 additions and 3 deletions
|
|
@ -10,6 +10,8 @@ import frappe
|
|||
import frappe.translate
|
||||
from frappe import _
|
||||
from frappe.translate import (
|
||||
extract_javascript,
|
||||
extract_messages_from_javascript_code,
|
||||
extract_messages_from_python_code,
|
||||
get_language,
|
||||
get_parent_language,
|
||||
|
|
@ -131,7 +133,7 @@ class TestTranslate(unittest.TestCase):
|
|||
"""Load all CSV files to ensure they have correct format"""
|
||||
verify_translation_files("frappe")
|
||||
|
||||
def test_python_ast_extractor(self):
|
||||
def test_python_extractor(self):
|
||||
|
||||
code = textwrap.dedent(
|
||||
"""
|
||||
|
|
@ -160,7 +162,61 @@ class TestTranslate(unittest.TestCase):
|
|||
]
|
||||
|
||||
output = extract_messages_from_python_code(code)
|
||||
self.assertEqual(output, expected_output, msg=output)
|
||||
self.assertEqual(len(expected_output), len(output))
|
||||
for expected, actual in zip(expected_output, output):
|
||||
with self.subTest():
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_js_extractor(self):
|
||||
|
||||
code = textwrap.dedent(
|
||||
"""
|
||||
__("attr")
|
||||
__("attr with", null, "context")
|
||||
__("attr with", ["format", "replacements"], "context")
|
||||
__("attr with", ["format", "replacements"])
|
||||
__(
|
||||
"Long JS string with", [
|
||||
"format", "replacements"
|
||||
],
|
||||
"JS context on newline"
|
||||
)
|
||||
__(
|
||||
"Long JS string with formats only {0}", [
|
||||
"format", "replacements"
|
||||
],
|
||||
)
|
||||
_(`template strings not supported yet`)
|
||||
"""
|
||||
)
|
||||
expected_output = [
|
||||
(2, "attr", None),
|
||||
(3, "attr with", "context"),
|
||||
(4, "attr with", "context"),
|
||||
(5, "attr with", None),
|
||||
(6, "Long JS string with", "JS context on newline"),
|
||||
(12, "Long JS string with formats only {0}", None),
|
||||
]
|
||||
|
||||
output = extract_messages_from_javascript_code(code)
|
||||
|
||||
self.assertEqual(len(expected_output), len(output))
|
||||
for expected, actual in zip(expected_output, output):
|
||||
with self.subTest():
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_js_parser_arg_capturing(self):
|
||||
"""Get non-flattened args in correct order so 3rd arg if present is always context."""
|
||||
|
||||
def get_args(code):
|
||||
*__, args = next(extract_javascript(code))
|
||||
return args
|
||||
|
||||
args = get_args("""__("attr with", ["format", "replacements"], "context")""")
|
||||
self.assertEqual(args, ("attr with", None, "context"))
|
||||
|
||||
args = get_args("""__("attr with", ["format", "replacements"])""")
|
||||
self.assertEqual(args, ("attr with", None))
|
||||
|
||||
|
||||
def verify_translation_files(app):
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import re
|
|||
from csv import reader
|
||||
|
||||
from babel.messages.extract import extract_python
|
||||
from babel.messages.jslexer import Token, tokenize, unquote_string
|
||||
from pypika.terms import PseudoColumn
|
||||
|
||||
import frappe
|
||||
|
|
@ -707,6 +708,8 @@ def get_messages_from_file(path: str) -> list[tuple[str, str, str | None, int]]:
|
|||
|
||||
if path.lower().endswith(".py"):
|
||||
messages = extract_messages_from_python_code(file_contents)
|
||||
elif path.lower().endswith(".js"):
|
||||
messages = extract_messages_from_javascript_code(file_contents)
|
||||
else:
|
||||
messages = extract_messages_from_code(file_contents)
|
||||
return [
|
||||
|
|
@ -718,7 +721,7 @@ def get_messages_from_file(path: str) -> list[tuple[str, str, str | None, int]]:
|
|||
|
||||
|
||||
def extract_messages_from_python_code(code: str) -> list[tuple[int, str, str | None]]:
|
||||
"""Extracts translatable strings from python code using AST"""
|
||||
"""Extracts translatable strings from Python code using babel."""
|
||||
|
||||
messages = []
|
||||
|
||||
|
|
@ -741,6 +744,147 @@ def extract_messages_from_python_code(code: str) -> list[tuple[int, str, str | N
|
|||
return messages
|
||||
|
||||
|
||||
def extract_messages_from_javascript_code(code: str) -> list[tuple[int, str, str | None]]:
|
||||
"""Extracts translatable strings from JavaScript code using babel."""
|
||||
|
||||
messages = []
|
||||
|
||||
for message in extract_javascript(
|
||||
code,
|
||||
keywords=["__"],
|
||||
options={},
|
||||
):
|
||||
lineno, _func, args = message
|
||||
|
||||
if not args or not args[0]:
|
||||
continue
|
||||
|
||||
source_text = args[0] if isinstance(args, tuple) else args
|
||||
context = None
|
||||
|
||||
if isinstance(args, tuple) and len(args) == 3 and isinstance(args[2], str):
|
||||
context = args[2]
|
||||
|
||||
messages.append((lineno, source_text, context))
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def extract_javascript(code, keywords=("__"), options=None):
|
||||
"""Extract messages from JavaScript source code.
|
||||
|
||||
This is a modified version of babel's JS parser. Reused under BSD license.
|
||||
License: https://github.com/python-babel/babel/blob/master/LICENSE
|
||||
|
||||
Changes from upstream:
|
||||
- Preserve arguments, babel's parser flattened all values in args,
|
||||
we need order because we use different syntax for translation
|
||||
which can contain 2nd arg which is array of many values. If
|
||||
argument is non-primitive type then value is NOT returned in
|
||||
args.
|
||||
E.g. __("0", ["1", "2"], "3") -> ("0", None, "3")
|
||||
- remove comments support
|
||||
- changed signature to accept string directly.
|
||||
|
||||
:param code: code as string
|
||||
:param keywords: a list of keywords (i.e. function names) that should be
|
||||
recognized as translation functions
|
||||
:param options: a dictionary of additional options (optional)
|
||||
Supported options are:
|
||||
* `template_string` -- set to false to disable ES6
|
||||
template string support.
|
||||
"""
|
||||
if options is None:
|
||||
options = {}
|
||||
|
||||
funcname = message_lineno = None
|
||||
messages = []
|
||||
last_argument = None
|
||||
concatenate_next = False
|
||||
last_token = None
|
||||
call_stack = -1
|
||||
dotted = any("." in kw for kw in keywords)
|
||||
|
||||
for token in tokenize(
|
||||
code,
|
||||
jsx=True,
|
||||
template_string=options.get("template_string", True),
|
||||
dotted=dotted,
|
||||
):
|
||||
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
|
||||
funcname
|
||||
and (last_token and last_token.type == "name") # have a keyword...
|
||||
and token.type # we've seen nothing after the keyword...
|
||||
== "template_string" # this is a template string
|
||||
):
|
||||
message_lineno = token.lineno
|
||||
messages = [unquote_string(token.value)]
|
||||
call_stack = 0
|
||||
token = Token("operator", ")", token.lineno)
|
||||
|
||||
if token.type == "operator" and token.value == "(":
|
||||
if funcname:
|
||||
message_lineno = token.lineno
|
||||
call_stack += 1
|
||||
|
||||
elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment":
|
||||
pass
|
||||
|
||||
elif funcname and call_stack == 0:
|
||||
if token.type == "operator" and token.value == ")":
|
||||
if last_argument is not None:
|
||||
messages.append(last_argument)
|
||||
if len(messages) > 1:
|
||||
messages = tuple(messages)
|
||||
elif messages:
|
||||
messages = messages[0]
|
||||
else:
|
||||
messages = None
|
||||
|
||||
if messages is not None:
|
||||
yield (message_lineno, funcname, messages)
|
||||
|
||||
funcname = message_lineno = last_argument = None
|
||||
concatenate_next = False
|
||||
messages = []
|
||||
call_stack = -1
|
||||
|
||||
elif token.type in ("string", "template_string"):
|
||||
new_value = unquote_string(token.value)
|
||||
if concatenate_next:
|
||||
last_argument = (last_argument or "") + new_value
|
||||
concatenate_next = False
|
||||
else:
|
||||
last_argument = new_value
|
||||
|
||||
elif token.type == "operator":
|
||||
if token.value == ",":
|
||||
if last_argument is not None:
|
||||
messages.append(last_argument)
|
||||
last_argument = None
|
||||
else:
|
||||
messages.append(None)
|
||||
concatenate_next = False
|
||||
elif token.value == "+":
|
||||
concatenate_next = True
|
||||
|
||||
elif call_stack > 0 and token.type == "operator" and token.value == ")":
|
||||
call_stack -= 1
|
||||
|
||||
elif funcname and call_stack == -1:
|
||||
funcname = None
|
||||
|
||||
elif (
|
||||
call_stack == -1
|
||||
and token.type == "name"
|
||||
and token.value in keywords
|
||||
and (last_token is None or last_token.type != "name" or last_token.value != "function")
|
||||
):
|
||||
funcname = token.value
|
||||
|
||||
last_token = token
|
||||
|
||||
|
||||
def extract_messages_from_code(code):
|
||||
"""
|
||||
Extracts translatable strings from a code file
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue