fix: move extract_javascript to extractors folder and use it
instead of babels js extractor
This commit is contained in:
parent
e70c3092da
commit
a53e819022
3 changed files with 151 additions and 154 deletions
|
|
@ -1,26 +1,163 @@
|
|||
from babel.messages.extract import extract_javascript
|
||||
from io import BufferedReader
|
||||
|
||||
|
||||
def extract(fileobj, keywords, comment_tags, options):
|
||||
# We use `__` as our translation function
|
||||
keywords = "__"
|
||||
def extract(fileobj: BufferedReader, keywords: str, comment_tags: tuple, options: dict):
|
||||
code = fileobj.read().decode("utf-8")
|
||||
|
||||
for lineno, funcname, messages in extract_javascript(code, "__", options):
|
||||
if not messages or not messages[0]:
|
||||
continue
|
||||
|
||||
for lineno, funcname, messages, comments in extract_javascript(
|
||||
fileobj, keywords, comment_tags, options
|
||||
):
|
||||
# `funcname` here will be `__` which is our translation function. We
|
||||
# have to convert it back to usual function names
|
||||
funcname = "gettext"
|
||||
|
||||
if isinstance(messages, tuple):
|
||||
if len(messages) == 3:
|
||||
if len(messages) == 3 and messages[2]:
|
||||
funcname = "pgettext"
|
||||
messages = (messages[2], messages[0])
|
||||
else:
|
||||
messages = messages[0]
|
||||
|
||||
# ignore empty messages like `__(myvar)``
|
||||
if not messages:
|
||||
continue
|
||||
yield lineno, funcname, messages, []
|
||||
|
||||
yield lineno, funcname, messages, comments
|
||||
|
||||
def extract_javascript(code, keywords=("__",), options=None):
|
||||
"""Extract messages from JavaScript source code.
|
||||
|
||||
This is a modified version of babel's JS parser. Reused under BSD license.
|
||||
License: https://github.com/python-babel/babel/blob/master/LICENSE
|
||||
|
||||
Changes from upstream:
|
||||
- Preserve arguments, babel's parser flattened all values in args,
|
||||
we need order because we use different syntax for translation
|
||||
which can contain 2nd arg which is array of many values. If
|
||||
argument is non-primitive type then value is NOT returned in
|
||||
args.
|
||||
E.g. __("0", ["1", "2"], "3") -> ("0", None, "3")
|
||||
- remove comments support
|
||||
- changed signature to accept string directly.
|
||||
|
||||
:param code: code as string
|
||||
:param keywords: a list of keywords (i.e. function names) that should be
|
||||
recognized as translation functions
|
||||
:param options: a dictionary of additional options (optional)
|
||||
Supported options are:
|
||||
* `template_string` -- set to false to disable ES6
|
||||
template string support.
|
||||
"""
|
||||
from babel.messages.jslexer import Token, tokenize, unquote_string
|
||||
|
||||
if options is None:
|
||||
options = {}
|
||||
|
||||
funcname = message_lineno = None
|
||||
messages = []
|
||||
last_argument = None
|
||||
concatenate_next = False
|
||||
last_token = None
|
||||
call_stack = -1
|
||||
|
||||
# Tree level = depth inside function call tree
|
||||
# Example: __("0", ["1", "2"], "3")
|
||||
# Depth __()
|
||||
# / | \
|
||||
# 0 "0" [...] "3" <- only 0th level strings matter
|
||||
# / \
|
||||
# 1 "1" "2"
|
||||
tree_level = 0
|
||||
opening_operators = {"[", "{"}
|
||||
closing_operators = {"]", "}"}
|
||||
all_container_operators = opening_operators.union(closing_operators)
|
||||
dotted = any("." in kw for kw in keywords)
|
||||
|
||||
for token in tokenize(
|
||||
code,
|
||||
jsx=True,
|
||||
template_string=options.get("template_string", True),
|
||||
dotted=dotted,
|
||||
):
|
||||
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
|
||||
funcname
|
||||
and (last_token and last_token.type == "name") # have a keyword...
|
||||
and token.type # we've seen nothing after the keyword...
|
||||
== "template_string" # this is a template string
|
||||
):
|
||||
message_lineno = token.lineno
|
||||
messages = [unquote_string(token.value)]
|
||||
call_stack = 0
|
||||
tree_level = 0
|
||||
token = Token("operator", ")", token.lineno)
|
||||
|
||||
if token.type == "operator" and token.value == "(":
|
||||
if funcname:
|
||||
message_lineno = token.lineno
|
||||
call_stack += 1
|
||||
|
||||
elif call_stack >= 0 and token.type == "operator" and token.value in all_container_operators:
|
||||
if token.value in opening_operators:
|
||||
tree_level += 1
|
||||
if token.value in closing_operators:
|
||||
tree_level -= 1
|
||||
|
||||
elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment":
|
||||
pass # ignore comments
|
||||
|
||||
elif funcname and call_stack == 0:
|
||||
if token.type == "operator" and token.value == ")":
|
||||
if last_argument is not None:
|
||||
messages.append(last_argument)
|
||||
if len(messages) > 1:
|
||||
messages = tuple(messages)
|
||||
elif messages:
|
||||
messages = messages[0]
|
||||
else:
|
||||
messages = None
|
||||
|
||||
if messages is not None:
|
||||
yield (message_lineno, funcname, messages)
|
||||
|
||||
funcname = message_lineno = last_argument = None
|
||||
concatenate_next = False
|
||||
messages = []
|
||||
call_stack = -1
|
||||
tree_level = 0
|
||||
|
||||
elif token.type in ("string", "template_string"):
|
||||
new_value = unquote_string(token.value)
|
||||
if tree_level > 0:
|
||||
pass
|
||||
elif concatenate_next:
|
||||
last_argument = (last_argument or "") + new_value
|
||||
concatenate_next = False
|
||||
else:
|
||||
last_argument = new_value
|
||||
|
||||
elif token.type == "operator":
|
||||
if token.value == ",":
|
||||
if last_argument is not None:
|
||||
messages.append(last_argument)
|
||||
last_argument = None
|
||||
else:
|
||||
if tree_level == 0:
|
||||
messages.append(None)
|
||||
concatenate_next = False
|
||||
elif token.value == "+":
|
||||
concatenate_next = True
|
||||
|
||||
elif call_stack > 0 and token.type == "operator" and token.value == ")":
|
||||
call_stack -= 1
|
||||
tree_level = 0
|
||||
|
||||
elif funcname and call_stack == -1:
|
||||
funcname = None
|
||||
|
||||
elif (
|
||||
call_stack == -1
|
||||
and token.type == "name"
|
||||
and token.value in keywords
|
||||
and (last_token is None or last_token.type != "name" or last_token.value != "function")
|
||||
):
|
||||
funcname = token.value
|
||||
|
||||
last_token = token
|
||||
|
|
|
|||
|
|
@ -8,12 +8,12 @@ from unittest.mock import patch
|
|||
import frappe
|
||||
import frappe.translate
|
||||
from frappe import _
|
||||
from frappe.gettext.extractors.javascript import extract_javascript
|
||||
from frappe.tests.utils import FrappeTestCase
|
||||
from frappe.translate import (
|
||||
MERGED_TRANSLATION_KEY,
|
||||
USER_TRANSLATION_KEY,
|
||||
clear_cache,
|
||||
extract_javascript,
|
||||
extract_messages_from_javascript_code,
|
||||
extract_messages_from_python_code,
|
||||
get_language,
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from contextlib import contextmanager, suppress
|
|||
from csv import reader, writer
|
||||
|
||||
import frappe
|
||||
from frappe.gettext.extractors.javascript import extract_javascript
|
||||
from frappe.gettext.translate import get_translations_from_mo
|
||||
from frappe.model.utils import InvalidIncludePath, render_include
|
||||
from frappe.query_builder import DocType, Field
|
||||
|
|
@ -675,147 +676,6 @@ def extract_messages_from_javascript_code(code: str) -> list[tuple[int, str, str
|
|||
return messages
|
||||
|
||||
|
||||
def extract_javascript(code, keywords=("__",), options=None):
|
||||
"""Extract messages from JavaScript source code.
|
||||
|
||||
This is a modified version of babel's JS parser. Reused under BSD license.
|
||||
License: https://github.com/python-babel/babel/blob/master/LICENSE
|
||||
|
||||
Changes from upstream:
|
||||
- Preserve arguments, babel's parser flattened all values in args,
|
||||
we need order because we use different syntax for translation
|
||||
which can contain 2nd arg which is array of many values. If
|
||||
argument is non-primitive type then value is NOT returned in
|
||||
args.
|
||||
E.g. __("0", ["1", "2"], "3") -> ("0", None, "3")
|
||||
- remove comments support
|
||||
- changed signature to accept string directly.
|
||||
|
||||
:param code: code as string
|
||||
:param keywords: a list of keywords (i.e. function names) that should be
|
||||
recognized as translation functions
|
||||
:param options: a dictionary of additional options (optional)
|
||||
Supported options are:
|
||||
* `template_string` -- set to false to disable ES6
|
||||
template string support.
|
||||
"""
|
||||
from babel.messages.jslexer import Token, tokenize, unquote_string
|
||||
|
||||
if options is None:
|
||||
options = {}
|
||||
|
||||
funcname = message_lineno = None
|
||||
messages = []
|
||||
last_argument = None
|
||||
concatenate_next = False
|
||||
last_token = None
|
||||
call_stack = -1
|
||||
|
||||
# Tree level = depth inside function call tree
|
||||
# Example: __("0", ["1", "2"], "3")
|
||||
# Depth __()
|
||||
# / | \
|
||||
# 0 "0" [...] "3" <- only 0th level strings matter
|
||||
# / \
|
||||
# 1 "1" "2"
|
||||
tree_level = 0
|
||||
opening_operators = {"[", "{"}
|
||||
closing_operators = {"]", "}"}
|
||||
all_container_operators = opening_operators.union(closing_operators)
|
||||
dotted = any("." in kw for kw in keywords)
|
||||
|
||||
for token in tokenize(
|
||||
code,
|
||||
jsx=True,
|
||||
template_string=options.get("template_string", True),
|
||||
dotted=dotted,
|
||||
):
|
||||
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
|
||||
funcname
|
||||
and (last_token and last_token.type == "name") # have a keyword...
|
||||
and token.type # we've seen nothing after the keyword...
|
||||
== "template_string" # this is a template string
|
||||
):
|
||||
message_lineno = token.lineno
|
||||
messages = [unquote_string(token.value)]
|
||||
call_stack = 0
|
||||
tree_level = 0
|
||||
token = Token("operator", ")", token.lineno)
|
||||
|
||||
if token.type == "operator" and token.value == "(":
|
||||
if funcname:
|
||||
message_lineno = token.lineno
|
||||
call_stack += 1
|
||||
|
||||
elif call_stack >= 0 and token.type == "operator" and token.value in all_container_operators:
|
||||
if token.value in opening_operators:
|
||||
tree_level += 1
|
||||
if token.value in closing_operators:
|
||||
tree_level -= 1
|
||||
|
||||
elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment":
|
||||
pass # ignore comments
|
||||
|
||||
elif funcname and call_stack == 0:
|
||||
if token.type == "operator" and token.value == ")":
|
||||
if last_argument is not None:
|
||||
messages.append(last_argument)
|
||||
if len(messages) > 1:
|
||||
messages = tuple(messages)
|
||||
elif messages:
|
||||
messages = messages[0]
|
||||
else:
|
||||
messages = None
|
||||
|
||||
if messages is not None:
|
||||
yield (message_lineno, funcname, messages)
|
||||
|
||||
funcname = message_lineno = last_argument = None
|
||||
concatenate_next = False
|
||||
messages = []
|
||||
call_stack = -1
|
||||
tree_level = 0
|
||||
|
||||
elif token.type in ("string", "template_string"):
|
||||
new_value = unquote_string(token.value)
|
||||
if tree_level > 0:
|
||||
pass
|
||||
elif concatenate_next:
|
||||
last_argument = (last_argument or "") + new_value
|
||||
concatenate_next = False
|
||||
else:
|
||||
last_argument = new_value
|
||||
|
||||
elif token.type == "operator":
|
||||
if token.value == ",":
|
||||
if last_argument is not None:
|
||||
messages.append(last_argument)
|
||||
last_argument = None
|
||||
else:
|
||||
if tree_level == 0:
|
||||
messages.append(None)
|
||||
concatenate_next = False
|
||||
elif token.value == "+":
|
||||
concatenate_next = True
|
||||
|
||||
elif call_stack > 0 and token.type == "operator" and token.value == ")":
|
||||
call_stack -= 1
|
||||
tree_level = 0
|
||||
|
||||
elif funcname and call_stack == -1:
|
||||
funcname = None
|
||||
|
||||
elif (
|
||||
call_stack == -1
|
||||
and token.type == "name"
|
||||
and token.value in keywords
|
||||
and (last_token is None or last_token.type != "name" or last_token.value != "function")
|
||||
):
|
||||
funcname = token.value
|
||||
|
||||
last_token = token
|
||||
|
||||
|
||||
def extract_messages_from_code(code):
|
||||
"""
|
||||
Extracts translatable strings from a code file
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue