feat: use AST to extract translation sources

2022-08-01 23:13:01 +05:30 · 2022-08-01 23:13:01 +05:30 · ea836a824a
commit ea836a824a
parent 0cb4da1195
2 changed files with 88 additions and 5 deletions
--- a/frappe/tests/test_translate.py
+++ b/frappe/tests/test_translate.py
@ -1,6 +1,7 @@
 # Copyright (c) 2021, Frappe Technologies Pvt. Ltd. and Contributors
 # License: MIT. See LICENSE
 import os
+import textwrap
 import unittest
 from random import choices
 from unittest.mock import patch
@ -8,7 +9,12 @@ from unittest.mock import patch
 import frappe
 import frappe.translate
 from frappe import _
-from frappe.translate import get_language, get_parent_language, get_translation_dict_from_file
+from frappe.translate import (
+	extract_messages_from_python_code,
+	get_language,
+	get_parent_language,
+	get_translation_dict_from_file,
+)
 from frappe.utils import set_request

 dirname = os.path.dirname(__file__)
@ -125,6 +131,31 @@ class TestTranslate(unittest.TestCase):
 		"""Load all CSV files to ensure they have correct format"""
 		verify_translation_files("frappe")

+	def test_python_ast_extractor(self):
+
+		code = textwrap.dedent(
+			"""
+			frappe._("attr")
+			_("name")
+			frappe._("attr with", context="attr context")
+			_("name with", context="name context")
+			_("broken on",
+				context="new line")
+			__("This wont be captured")
+			__init__("This shouldn't too")
+		"""
+		)
+		expected_output = [
+			(2, "attr", None),
+			(3, "name", None),
+			(4, "attr with", "attr context"),
+			(5, "name with", "name context"),
+			(6, "broken on", "new line"),
+		]
+
+		output = extract_messages_from_python_code(code)
+		self.assertEqual(output, expected_output)
+

 def verify_translation_files(app):
 	"""Function to verify translation file syntax in app."""
--- a/frappe/translate.py
+++ b/frappe/translate.py
@ -7,8 +7,8 @@
 	Translation tools for frappe
 """

+import ast
 import functools
-import io
 import itertools
 import json
 import operator
@ -682,7 +682,7 @@ def get_all_messages_from_js_files(app_name=None):
 	return messages


-def get_messages_from_file(path: str) -> list[tuple[str, str, str, str]]:
+def get_messages_from_file(path: str) -> list[tuple[str, str, str | None, int]]:
 	"""Returns a list of transatable strings from a code file

 	:param path: path of the code file
@ -704,19 +704,71 @@ def get_messages_from_file(path: str) -> list[tuple[str, str, str, str]]:
 				print(f"Could not scan file for translation: {path}")
 				return []

+			if path.lower().endswith(".py"):
+				messages = extract_messages_from_python_code(file_contents)
+			else:
+				messages = extract_messages_from_code(file_contents)
 			return [
 				(os.path.relpath(path, bench_path), message, context, line)
-				for (line, message, context) in extract_messages_from_code(file_contents)
+				for (line, message, context) in messages
 			]
 	else:
 		return []


+def extract_messages_from_python_code(code: str) -> list[tuple[int, str, str | None]]:
+	"""Extracts translatable strings from python code using AST"""
+
+	tree = ast.parse(code)
+
+	TRANSLATE_FUNCTION = "_"
+	messages = []
+
+	for node in ast.walk(tree):
+		if not isinstance(node, ast.Call):
+			continue
+
+		# frappe._(...)
+		direct_call = isinstance(node.func, ast.Attribute) and node.func.attr == TRANSLATE_FUNCTION
+		# from frappe import _
+		# _(...)
+		imported_call = isinstance(node.func, ast.Name) and node.func.id == TRANSLATE_FUNCTION
+
+		if not (direct_call or imported_call):
+			continue
+
+		message = _extract_message_from_translation_node(node)
+		if message:
+			messages.append(message)
+
+	return messages
+
+
+def _extract_message_from_translation_node(node: ast.Call) -> tuple[int, str, str | None]:
+
+	# extract source text
+	source_text = None
+	if node.args and isinstance(node.args[0], ast.Constant):
+		source_text = node.args[0].value
+	if not isinstance(source_text, str):
+		# you can have non-str default args which don't make sense for translations
+		return
+
+	# Extract context, a kwarg called "context" should exist.
+	context = None
+	for kw in node.keywords:
+		if kw.arg != "context":
+			continue
+		if isinstance(kw.value, ast.Constant) and isinstance(kw.value.value, str):
+			context = kw.value.value
+
+	return (node.lineno, source_text, context)
+
+
 def extract_messages_from_code(code):
 	"""
 	Extracts translatable strings from a code file
 	:param code: code from which translatable files are to be extracted
-	:param is_py: include messages in triple quotes e.g. `_('''message''')`
 	"""
 	from jinja2 import TemplateError