from io import BufferedReader def extract(fileobj: BufferedReader, keywords: str, comment_tags: tuple, options: dict): code = fileobj.read().decode("utf-8") for lineno, funcname, messages in extract_javascript(code, "__", options): if not messages or not messages[0]: continue # `funcname` here will be `__` which is our translation function. We # have to convert it back to usual function names funcname = "gettext" if isinstance(messages, tuple): if len(messages) == 3 and messages[2]: funcname = "pgettext" messages = (messages[2], messages[0]) else: messages = messages[0] yield lineno, funcname, messages, [] def extract_javascript(code, keywords=("__",), options=None, lineno=1): """Extract messages from JavaScript source code. This is a modified version of babel's JS parser. Reused under BSD license. License: https://github.com/python-babel/babel/blob/master/LICENSE Changes from upstream: - Preserve arguments, babel's parser flattened all values in args, we need order because we use different syntax for translation which can contain 2nd arg which is array of many values. If argument is non-primitive type then value is NOT returned in args. E.g. __("0", ["1", "2"], "3") -> ("0", None, "3") - remove comments support - changed signature to accept string directly. :param code: code as string :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param options: a dictionary of additional options (optional) Supported options are: * `template_string` -- set to false to disable ES6 template string support. """ from babel.messages.jslexer import Token, tokenize, unquote_string if options is None: options = {} funcname = message_lineno = None messages = [] last_argument = None concatenate_next = False last_token = None call_stack = -1 # Tree level = depth inside function call tree # Example: __("0", ["1", "2"], "3") # Depth __() # / | \ # 0 "0" [...] "3" <- only 0th level strings matter # / \ # 1 "1" "2" tree_level = 0 opening_operators = {"[", "{"} closing_operators = {"]", "}"} all_container_operators = opening_operators.union(closing_operators) dotted = any("." in kw for kw in keywords) for token in tokenize( code, jsx=True, dotted=dotted, template_string=options.get("template_string", True), lineno=lineno, ): if ( # Turn keyword`foo` expressions into keyword("foo") calls: funcname and (last_token and last_token.type == "name") # have a keyword... and token.type # we've seen nothing after the keyword... == "template_string" # this is a template string ): message_lineno = token.lineno messages = [unquote_string(token.value)] call_stack = 0 tree_level = 0 token = Token("operator", ")", token.lineno) if not funcname and token.type == "template_string": yield from parse_template_string(token.value, keywords, options, token.lineno) if token.type == "operator" and token.value == "(": if funcname: message_lineno = token.lineno call_stack += 1 elif call_stack >= 0 and token.type == "operator" and token.value in all_container_operators: if token.value in opening_operators: tree_level += 1 if token.value in closing_operators: tree_level -= 1 elif call_stack == -1 and token.type == "linecomment" or token.type == "multilinecomment": pass # ignore comments elif funcname and call_stack == 0: if token.type == "operator" and token.value == ")": if last_argument is not None: messages.append(last_argument) if len(messages) > 1: messages = tuple(messages) elif messages: messages = messages[0] else: messages = None if messages is not None: yield (message_lineno, funcname, messages) funcname = message_lineno = last_argument = None concatenate_next = False messages = [] call_stack = -1 tree_level = 0 elif token.type in ("string", "template_string"): new_value = unquote_string(token.value) if tree_level > 0: pass elif concatenate_next: last_argument = (last_argument or "") + new_value concatenate_next = False else: last_argument = new_value elif token.type == "operator": if token.value == ",": if last_argument is not None: messages.append(last_argument) last_argument = None else: if tree_level == 0: messages.append(None) concatenate_next = False elif token.value == "+": concatenate_next = True elif call_stack > 0 and token.type == "operator" and token.value == ")": call_stack -= 1 tree_level = 0 elif funcname and call_stack == -1: funcname = None elif ( call_stack == -1 and token.type == "name" and token.value in keywords and (last_token is None or last_token.type != "name" or last_token.value != "function") ): funcname = token.value last_token = token def parse_template_string( template_string, keywords, options, lineno=1, ): """Parse JavaScript template string. This is a modified version of babel's JS parser. Reused under BSD license. License: https://github.com/python-babel/babel/blob/master/LICENSE :param template_string: the template string to be parsed :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param options: a dictionary of additional options (optional) :param lineno: starting line number (optional) """ from babel.messages.jslexer import line_re prev_character = None level = 0 inside_str = False expression_contents = "" for character in template_string[1:-1]: if not inside_str and character in ('"', "'", "`"): inside_str = character elif inside_str == character and prev_character != r"\\": inside_str = False if level: expression_contents += character if not inside_str: if character == "{" and prev_character == "$": level += 1 elif level and character == "}": level -= 1 if level == 0 and expression_contents: expression_contents = expression_contents[:-1] yield from extract_javascript(expression_contents, keywords, options, lineno) lineno += len(line_re.findall(expression_contents)) expression_contents = "" prev_character = character