fix: move extract_javascript to extractors folder and use it

instead of babels js extractor
2023-12-28 17:00:46 +01:00 · 2023-12-28 17:00:46 +01:00 · a53e819022
commit a53e819022
parent e70c3092da
3 changed files with 151 additions and 154 deletions
--- a/frappe/gettext/extractors/javascript.py
+++ b/frappe/gettext/extractors/javascript.py
@ -1,26 +1,163 @@
-from babel.messages.extract import extract_javascript
+from io import BufferedReader


-def extract(fileobj, keywords, comment_tags, options):
-	# We use `__` as our translation function
-	keywords = "__"
+def extract(fileobj: BufferedReader, keywords: str, comment_tags: tuple, options: dict):
+	code = fileobj.read().decode("utf-8")
+
+	for lineno, funcname, messages in extract_javascript(code, "__", options):
+		if not messages or not messages[0]:
+			continue

-	for lineno, funcname, messages, comments in extract_javascript(
-		fileobj, keywords, comment_tags, options
-	):
 		# `funcname` here will be `__` which is our translation function. We
 		# have to convert it back to usual function names
 		funcname = "gettext"

 		if isinstance(messages, tuple):
-			if len(messages) == 3:
+			if len(messages) == 3 and messages[2]:
 				funcname = "pgettext"
 				messages = (messages[2], messages[0])
 			else:
 				messages = messages[0]

-		# ignore empty messages like `__(myvar)``
-		if not messages:
-			continue
+		yield lineno, funcname, messages, []

-		yield lineno, funcname, messages, comments
+
+def extract_javascript(code, keywords=("__",), options=None):
+	"""Extract messages from JavaScript source code.
+
+	This is a modified version of babel's JS parser. Reused under BSD license.
+	License: https://github.com/python-babel/babel/blob/master/LICENSE
+
+	Changes from upstream:
+	- Preserve arguments, babel's parser flattened all values in args,
+	  we need order because we use different syntax for translation
+	  which can contain 2nd arg which is array of many values. If
+	  argument is non-primitive type then value is NOT returned in
+	  args.
+	  E.g. __("0", ["1", "2"], "3") -> ("0", None, "3")
+	- remove comments support
+	- changed signature to accept string directly.
+
+	:param code: code as string
+	:param keywords: a list of keywords (i.e. function names) that should be
+	                 recognized as translation functions
+	:param options: a dictionary of additional options (optional)
+	                Supported options are:
+	                * `template_string` -- set to false to disable ES6
+	                                       template string support.
+	"""
+	from babel.messages.jslexer import Token, tokenize, unquote_string
+
+	if options is None:
+		options = {}
+
+	funcname = message_lineno = None
+	messages = []
+	last_argument = None
+	concatenate_next = False
+	last_token = None
+	call_stack = -1
+
+	# Tree level = depth inside function call tree
+	#  Example: __("0", ["1", "2"], "3")
+	# Depth         __()
+	#             /   |   \
+	#   0       "0" [...] "3"  <- only 0th level strings matter
+	#                /  \
+	#   1          "1"  "2"
+	tree_level = 0
+	opening_operators = {"[", "{"}
+	closing_operators = {"]", "}"}
+	all_container_operators = opening_operators.union(closing_operators)
+	dotted = any("." in kw for kw in keywords)
+
+	for token in tokenize(
+		code,
+		jsx=True,
+		template_string=options.get("template_string", True),
+		dotted=dotted,
+	):
+		if (  # Turn keyword`foo` expressions into keyword("foo") calls:
+			funcname
+			and (last_token and last_token.type == "name")  # have a keyword...
+			and token.type  # we've seen nothing after the keyword...
+			== "template_string"  # this is a template string
+		):
+			message_lineno = token.lineno
+			messages = [unquote_string(token.value)]
+			call_stack = 0
+			tree_level = 0
+			token = Token("operator", ")", token.lineno)
+
+		if token.type == "operator" and token.value == "(":
+			if funcname:
+				message_lineno = token.lineno
+				call_stack += 1
+
+		elif call_stack >= 0 and token.type == "operator" and token.value in all_container_operators:
+			if token.value in opening_operators:
+				tree_level += 1
+			if token.value in closing_operators:
+				tree_level -= 1
+
+		elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment":
+			pass  # ignore comments
+
+		elif funcname and call_stack == 0:
+			if token.type == "operator" and token.value == ")":
+				if last_argument is not None:
+					messages.append(last_argument)
+				if len(messages) > 1:
+					messages = tuple(messages)
+				elif messages:
+					messages = messages[0]
+				else:
+					messages = None
+
+				if messages is not None:
+					yield (message_lineno, funcname, messages)
+
+				funcname = message_lineno = last_argument = None
+				concatenate_next = False
+				messages = []
+				call_stack = -1
+				tree_level = 0
+
+			elif token.type in ("string", "template_string"):
+				new_value = unquote_string(token.value)
+				if tree_level > 0:
+					pass
+				elif concatenate_next:
+					last_argument = (last_argument or "") + new_value
+					concatenate_next = False
+				else:
+					last_argument = new_value
+
+			elif token.type == "operator":
+				if token.value == ",":
+					if last_argument is not None:
+						messages.append(last_argument)
+						last_argument = None
+					else:
+						if tree_level == 0:
+							messages.append(None)
+					concatenate_next = False
+				elif token.value == "+":
+					concatenate_next = True
+
+		elif call_stack > 0 and token.type == "operator" and token.value == ")":
+			call_stack -= 1
+			tree_level = 0
+
+		elif funcname and call_stack == -1:
+			funcname = None
+
+		elif (
+			call_stack == -1
+			and token.type == "name"
+			and token.value in keywords
+			and (last_token is None or last_token.type != "name" or last_token.value != "function")
+		):
+			funcname = token.value
+
+		last_token = token
--- a/frappe/tests/test_translate.py
+++ b/frappe/tests/test_translate.py
@ -8,12 +8,12 @@ from unittest.mock import patch
 import frappe
 import frappe.translate
 from frappe import _
+from frappe.gettext.extractors.javascript import extract_javascript
 from frappe.tests.utils import FrappeTestCase
 from frappe.translate import (
 	MERGED_TRANSLATION_KEY,
 	USER_TRANSLATION_KEY,
 	clear_cache,
-	extract_javascript,
 	extract_messages_from_javascript_code,
 	extract_messages_from_python_code,
 	get_language,
--- a/frappe/translate.py
+++ b/frappe/translate.py
@ -19,6 +19,7 @@ from contextlib import contextmanager, suppress
 from csv import reader, writer

 import frappe
+from frappe.gettext.extractors.javascript import extract_javascript
 from frappe.gettext.translate import get_translations_from_mo
 from frappe.model.utils import InvalidIncludePath, render_include
 from frappe.query_builder import DocType, Field
@ -675,147 +676,6 @@ def extract_messages_from_javascript_code(code: str) -> list[tuple[int, str, str
 	return messages


-def extract_javascript(code, keywords=("__",), options=None):
-	"""Extract messages from JavaScript source code.
-
-	This is a modified version of babel's JS parser. Reused under BSD license.
-	License: https://github.com/python-babel/babel/blob/master/LICENSE
-
-	Changes from upstream:
-	- Preserve arguments, babel's parser flattened all values in args,
-	  we need order because we use different syntax for translation
-	  which can contain 2nd arg which is array of many values. If
-	  argument is non-primitive type then value is NOT returned in
-	  args.
-	  E.g. __("0", ["1", "2"], "3") -> ("0", None, "3")
-	- remove comments support
-	- changed signature to accept string directly.
-
-	:param code: code as string
-	:param keywords: a list of keywords (i.e. function names) that should be
-	                 recognized as translation functions
-	:param options: a dictionary of additional options (optional)
-	                Supported options are:
-	                * `template_string` -- set to false to disable ES6
-	                                       template string support.
-	"""
-	from babel.messages.jslexer import Token, tokenize, unquote_string
-
-	if options is None:
-		options = {}
-
-	funcname = message_lineno = None
-	messages = []
-	last_argument = None
-	concatenate_next = False
-	last_token = None
-	call_stack = -1
-
-	# Tree level = depth inside function call tree
-	#  Example: __("0", ["1", "2"], "3")
-	# Depth         __()
-	#             /   |   \
-	#   0       "0" [...] "3"  <- only 0th level strings matter
-	#                /  \
-	#   1          "1"  "2"
-	tree_level = 0
-	opening_operators = {"[", "{"}
-	closing_operators = {"]", "}"}
-	all_container_operators = opening_operators.union(closing_operators)
-	dotted = any("." in kw for kw in keywords)
-
-	for token in tokenize(
-		code,
-		jsx=True,
-		template_string=options.get("template_string", True),
-		dotted=dotted,
-	):
-		if (  # Turn keyword`foo` expressions into keyword("foo") calls:
-			funcname
-			and (last_token and last_token.type == "name")  # have a keyword...
-			and token.type  # we've seen nothing after the keyword...
-			== "template_string"  # this is a template string
-		):
-			message_lineno = token.lineno
-			messages = [unquote_string(token.value)]
-			call_stack = 0
-			tree_level = 0
-			token = Token("operator", ")", token.lineno)
-
-		if token.type == "operator" and token.value == "(":
-			if funcname:
-				message_lineno = token.lineno
-				call_stack += 1
-
-		elif call_stack >= 0 and token.type == "operator" and token.value in all_container_operators:
-			if token.value in opening_operators:
-				tree_level += 1
-			if token.value in closing_operators:
-				tree_level -= 1
-
-		elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment":
-			pass  # ignore comments
-
-		elif funcname and call_stack == 0:
-			if token.type == "operator" and token.value == ")":
-				if last_argument is not None:
-					messages.append(last_argument)
-				if len(messages) > 1:
-					messages = tuple(messages)
-				elif messages:
-					messages = messages[0]
-				else:
-					messages = None
-
-				if messages is not None:
-					yield (message_lineno, funcname, messages)
-
-				funcname = message_lineno = last_argument = None
-				concatenate_next = False
-				messages = []
-				call_stack = -1
-				tree_level = 0
-
-			elif token.type in ("string", "template_string"):
-				new_value = unquote_string(token.value)
-				if tree_level > 0:
-					pass
-				elif concatenate_next:
-					last_argument = (last_argument or "") + new_value
-					concatenate_next = False
-				else:
-					last_argument = new_value
-
-			elif token.type == "operator":
-				if token.value == ",":
-					if last_argument is not None:
-						messages.append(last_argument)
-						last_argument = None
-					else:
-						if tree_level == 0:
-							messages.append(None)
-					concatenate_next = False
-				elif token.value == "+":
-					concatenate_next = True
-
-		elif call_stack > 0 and token.type == "operator" and token.value == ")":
-			call_stack -= 1
-			tree_level = 0
-
-		elif funcname and call_stack == -1:
-			funcname = None
-
-		elif (
-			call_stack == -1
-			and token.type == "name"
-			and token.value in keywords
-			and (last_token is None or last_token.type != "name" or last_token.value != "function")
-		):
-			funcname = token.value
-
-		last_token = token
-
-
 def extract_messages_from_code(code):
 	"""
 	Extracts translatable strings from a code file