fix: move extract_javascript to extractors folder and use it

instead of babels js extractor
This commit is contained in:
barredterra 2023-12-28 17:00:46 +01:00
parent e70c3092da
commit a53e819022
3 changed files with 151 additions and 154 deletions

View file

@ -1,26 +1,163 @@
from babel.messages.extract import extract_javascript
from io import BufferedReader
def extract(fileobj, keywords, comment_tags, options):
# We use `__` as our translation function
keywords = "__"
def extract(fileobj: BufferedReader, keywords: str, comment_tags: tuple, options: dict):
code = fileobj.read().decode("utf-8")
for lineno, funcname, messages in extract_javascript(code, "__", options):
if not messages or not messages[0]:
continue
for lineno, funcname, messages, comments in extract_javascript(
fileobj, keywords, comment_tags, options
):
# `funcname` here will be `__` which is our translation function. We
# have to convert it back to usual function names
funcname = "gettext"
if isinstance(messages, tuple):
if len(messages) == 3:
if len(messages) == 3 and messages[2]:
funcname = "pgettext"
messages = (messages[2], messages[0])
else:
messages = messages[0]
# ignore empty messages like `__(myvar)``
if not messages:
continue
yield lineno, funcname, messages, []
yield lineno, funcname, messages, comments
def extract_javascript(code, keywords=("__",), options=None):
"""Extract messages from JavaScript source code.
This is a modified version of babel's JS parser. Reused under BSD license.
License: https://github.com/python-babel/babel/blob/master/LICENSE
Changes from upstream:
- Preserve arguments, babel's parser flattened all values in args,
we need order because we use different syntax for translation
which can contain 2nd arg which is array of many values. If
argument is non-primitive type then value is NOT returned in
args.
E.g. __("0", ["1", "2"], "3") -> ("0", None, "3")
- remove comments support
- changed signature to accept string directly.
:param code: code as string
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param options: a dictionary of additional options (optional)
Supported options are:
* `template_string` -- set to false to disable ES6
template string support.
"""
from babel.messages.jslexer import Token, tokenize, unquote_string
if options is None:
options = {}
funcname = message_lineno = None
messages = []
last_argument = None
concatenate_next = False
last_token = None
call_stack = -1
# Tree level = depth inside function call tree
# Example: __("0", ["1", "2"], "3")
# Depth __()
# / | \
# 0 "0" [...] "3" <- only 0th level strings matter
# / \
# 1 "1" "2"
tree_level = 0
opening_operators = {"[", "{"}
closing_operators = {"]", "}"}
all_container_operators = opening_operators.union(closing_operators)
dotted = any("." in kw for kw in keywords)
for token in tokenize(
code,
jsx=True,
template_string=options.get("template_string", True),
dotted=dotted,
):
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
funcname
and (last_token and last_token.type == "name") # have a keyword...
and token.type # we've seen nothing after the keyword...
== "template_string" # this is a template string
):
message_lineno = token.lineno
messages = [unquote_string(token.value)]
call_stack = 0
tree_level = 0
token = Token("operator", ")", token.lineno)
if token.type == "operator" and token.value == "(":
if funcname:
message_lineno = token.lineno
call_stack += 1
elif call_stack >= 0 and token.type == "operator" and token.value in all_container_operators:
if token.value in opening_operators:
tree_level += 1
if token.value in closing_operators:
tree_level -= 1
elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment":
pass # ignore comments
elif funcname and call_stack == 0:
if token.type == "operator" and token.value == ")":
if last_argument is not None:
messages.append(last_argument)
if len(messages) > 1:
messages = tuple(messages)
elif messages:
messages = messages[0]
else:
messages = None
if messages is not None:
yield (message_lineno, funcname, messages)
funcname = message_lineno = last_argument = None
concatenate_next = False
messages = []
call_stack = -1
tree_level = 0
elif token.type in ("string", "template_string"):
new_value = unquote_string(token.value)
if tree_level > 0:
pass
elif concatenate_next:
last_argument = (last_argument or "") + new_value
concatenate_next = False
else:
last_argument = new_value
elif token.type == "operator":
if token.value == ",":
if last_argument is not None:
messages.append(last_argument)
last_argument = None
else:
if tree_level == 0:
messages.append(None)
concatenate_next = False
elif token.value == "+":
concatenate_next = True
elif call_stack > 0 and token.type == "operator" and token.value == ")":
call_stack -= 1
tree_level = 0
elif funcname and call_stack == -1:
funcname = None
elif (
call_stack == -1
and token.type == "name"
and token.value in keywords
and (last_token is None or last_token.type != "name" or last_token.value != "function")
):
funcname = token.value
last_token = token

View file

@ -8,12 +8,12 @@ from unittest.mock import patch
import frappe
import frappe.translate
from frappe import _
from frappe.gettext.extractors.javascript import extract_javascript
from frappe.tests.utils import FrappeTestCase
from frappe.translate import (
MERGED_TRANSLATION_KEY,
USER_TRANSLATION_KEY,
clear_cache,
extract_javascript,
extract_messages_from_javascript_code,
extract_messages_from_python_code,
get_language,

View file

@ -19,6 +19,7 @@ from contextlib import contextmanager, suppress
from csv import reader, writer
import frappe
from frappe.gettext.extractors.javascript import extract_javascript
from frappe.gettext.translate import get_translations_from_mo
from frappe.model.utils import InvalidIncludePath, render_include
from frappe.query_builder import DocType, Field
@ -675,147 +676,6 @@ def extract_messages_from_javascript_code(code: str) -> list[tuple[int, str, str
return messages
def extract_javascript(code, keywords=("__",), options=None):
"""Extract messages from JavaScript source code.
This is a modified version of babel's JS parser. Reused under BSD license.
License: https://github.com/python-babel/babel/blob/master/LICENSE
Changes from upstream:
- Preserve arguments, babel's parser flattened all values in args,
we need order because we use different syntax for translation
which can contain 2nd arg which is array of many values. If
argument is non-primitive type then value is NOT returned in
args.
E.g. __("0", ["1", "2"], "3") -> ("0", None, "3")
- remove comments support
- changed signature to accept string directly.
:param code: code as string
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param options: a dictionary of additional options (optional)
Supported options are:
* `template_string` -- set to false to disable ES6
template string support.
"""
from babel.messages.jslexer import Token, tokenize, unquote_string
if options is None:
options = {}
funcname = message_lineno = None
messages = []
last_argument = None
concatenate_next = False
last_token = None
call_stack = -1
# Tree level = depth inside function call tree
# Example: __("0", ["1", "2"], "3")
# Depth __()
# / | \
# 0 "0" [...] "3" <- only 0th level strings matter
# / \
# 1 "1" "2"
tree_level = 0
opening_operators = {"[", "{"}
closing_operators = {"]", "}"}
all_container_operators = opening_operators.union(closing_operators)
dotted = any("." in kw for kw in keywords)
for token in tokenize(
code,
jsx=True,
template_string=options.get("template_string", True),
dotted=dotted,
):
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
funcname
and (last_token and last_token.type == "name") # have a keyword...
and token.type # we've seen nothing after the keyword...
== "template_string" # this is a template string
):
message_lineno = token.lineno
messages = [unquote_string(token.value)]
call_stack = 0
tree_level = 0
token = Token("operator", ")", token.lineno)
if token.type == "operator" and token.value == "(":
if funcname:
message_lineno = token.lineno
call_stack += 1
elif call_stack >= 0 and token.type == "operator" and token.value in all_container_operators:
if token.value in opening_operators:
tree_level += 1
if token.value in closing_operators:
tree_level -= 1
elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment":
pass # ignore comments
elif funcname and call_stack == 0:
if token.type == "operator" and token.value == ")":
if last_argument is not None:
messages.append(last_argument)
if len(messages) > 1:
messages = tuple(messages)
elif messages:
messages = messages[0]
else:
messages = None
if messages is not None:
yield (message_lineno, funcname, messages)
funcname = message_lineno = last_argument = None
concatenate_next = False
messages = []
call_stack = -1
tree_level = 0
elif token.type in ("string", "template_string"):
new_value = unquote_string(token.value)
if tree_level > 0:
pass
elif concatenate_next:
last_argument = (last_argument or "") + new_value
concatenate_next = False
else:
last_argument = new_value
elif token.type == "operator":
if token.value == ",":
if last_argument is not None:
messages.append(last_argument)
last_argument = None
else:
if tree_level == 0:
messages.append(None)
concatenate_next = False
elif token.value == "+":
concatenate_next = True
elif call_stack > 0 and token.type == "operator" and token.value == ")":
call_stack -= 1
tree_level = 0
elif funcname and call_stack == -1:
funcname = None
elif (
call_stack == -1
and token.type == "name"
and token.value in keywords
and (last_token is None or last_token.type != "name" or last_token.value != "function")
):
funcname = token.value
last_token = token
def extract_messages_from_code(code):
"""
Extracts translatable strings from a code file