seitime-frappe/webnotes/translate.py

# Copyright (c) 2013, Web Notes Technologies Pvt. Ltd.
# MIT License. See license.txt

from __future__ import unicode_literals

"""
Contributing:
1. Add the .csv file
2. Run import
3. Then run translate
"""

import webnotes
import os
import codecs
import json
import re
from csv import reader
from webnotes.modules import get_doc_path
from webnotes.utils import get_base_path

messages = {}

def translate(lang=None):
	languages = [lang]
	if lang=="all" or lang==None:
		languages = get_all_languages()

	print "Extracting / updating translatable strings..."
	build_message_files()

	print "Compiling messages in one file..."
	export_messages(lang, '_lang_tmp.csv')

	for lang in languages:
		if lang != "en":
			filename = 'app/translations/'+lang+'.csv'
			print "For " + lang + ":"
			print "Translating via Google Translate..."
			google_translate(lang, '_lang_tmp.csv', filename)
			print "Updating language files..."
			import_messages(lang, filename)

	print "Deleting temp file..."
	os.remove('_lang_tmp.csv')

def get_all_languages():
	try:
		return [f[:-4] for f in os.listdir("app/translations") if f.endswith(".csv")]
	except OSError, e:
		if e.args[0]==2:
			return []
		else:
			raise e

def get_lang_dict():
	languages_path = os.path.join(get_base_path(), "app", "translations", "languages.json")
	if os.path.exists(languages_path):
		with open(languages_path, "r") as langfile:
			return json.loads(langfile.read())
	else: return {}

def update_translations():
	"""
	compare language file timestamps with last updated timestamps in `.wnf-lang-status`
	if timestamps are missing / changed, build new `.json` files in the `lang folders`
	"""
	langstatus = {}
	languages = get_all_languages()
	message_updated = False
	status_file_path = "app/.wnf-lang-status"

	if not os.path.exists(os.path.join('app', 'translations')):
		return
	if os.path.exists(status_file_path):
		with open(status_file_path, "r") as langstatusfile:
			langstatus = eval(langstatusfile.read())

	for lang in languages:
		filename = os.path.join('app', 'translations', lang + '.csv')
		if langstatus.get(lang, None)!=os.path.getmtime(filename):
			print "Setting up lang files for " + lang + "..."
			if not message_updated:
				print "Extracting / updating translatable strings..."
				build_message_files()
				message_updated = True
			print "Writing translations..."
			import_messages(lang, filename)
			langstatus[lang] = os.path.getmtime(filename)

	with open(status_file_path, "w") as langstatusfile:
		langstatus = langstatusfile.write(str(langstatus))

def build_message_files():
	"""build from doctypes, pages, database and framework"""
	if not webnotes.conn:
		webnotes.connect()

	build_for_pages('lib/core')
	build_for_pages('app')

	build_from_doctype_code('lib/core')
	build_from_doctype_code('app')

	# doctype
	build_from_database()

	build_for_framework('lib/webnotes', 'py', with_doctype_names=True)
	build_for_framework('lib/public/js/wn', 'js')
	build_for_framework('app/public/js', 'js', with_doctype_names=True)

def build_for_pages(path):
	"""make locale files for framework py and js (all)"""
	messages = []
	for (basepath, folders, files) in os.walk(path):
		if os.path.basename(os.path.dirname(basepath))=="page":
			messages_js, messages_py = [], []
			for fname in files:
				if fname.endswith('.js'):
					messages_js += get_message_list(os.path.join(basepath, fname))
				if fname.endswith('.py'):
					messages_py += get_message_list(os.path.join(basepath, fname))
			if messages_js:
				write_messages_file(basepath, messages_js, "js")
			if messages_py:
				write_messages_file(basepath, messages_py, "py")

def build_from_database():
	"""make doctype labels, names, options, descriptions"""
	def get_select_options(doc):
		if doc.doctype=="DocField" and doc.fieldtype=='Select' and doc.options \
			and not doc.options.startswith("link:") \
			and not doc.options.startswith("attach_files:"):
			return doc.options.split('\n')
		else:
			return []

	build_for_doc_from_database(webnotes._dict({
		"doctype": "DocType",
		"module_field": "module",
		"DocType": ["name", "description", "module"],
		"DocField": ["label", "description"],
		"custom": get_select_options
	}))

def build_for_doc_from_database(fields):
	for item in webnotes.conn.sql("""select name from `tab%s`""" % fields.doctype, as_dict=1):
		messages = []
		doclist = webnotes.bean(fields.doctype, item.name).doclist

		for doc in doclist:
			if doc.doctype in fields:
				messages += map(lambda x: x in fields[doc.doctype] and doc.fields.get(x) or None,
					doc.fields.keys())

			if fields.custom:
				messages += fields.custom(doc)

		doc = doclist[0]
		if doc.fields.get(fields.module_field):
			doctype_path = get_doc_path(doc.fields[fields.module_field],
				doc.doctype, doc.name)
			write_messages_file(doctype_path, messages, 'doc')

def build_for_framework(path, mtype, with_doctype_names = False):
	"""make locale files for framework py and js (all)"""
	messages = []
	for (basepath, folders, files) in os.walk(path):
		for fname in files:
			if fname.endswith('.' + mtype):
				messages += get_message_list(os.path.join(basepath, fname))


	# append module & doctype names
	if with_doctype_names:
		for m in webnotes.conn.sql("""select name, module from `tabDocType`"""):
			messages.append(m[0])
			messages.append(m[1])

	# append labels from config.json
	config = webnotes.get_config()
	for moduleinfo in config["modules"].values():
		if moduleinfo.get("label"):
			messages.append(moduleinfo["label"])

	if messages:
		write_messages_file(path, messages, mtype)

def build_from_doctype_code(path):
	"""walk and make locale files in all folders"""
	for (basepath, folders, files) in os.walk(path):
		messagespy = []
		messagesjs = []
		for fname in files:
			if fname.endswith('py'):
				messagespy += get_message_list(os.path.join(basepath, fname))
			if fname.endswith('js'):
				messagesjs += get_message_list(os.path.join(basepath, fname))

		if messagespy:
			write_messages_file(basepath, messagespy, 'py')

		if messagespy:
			write_messages_file(basepath, messagesjs, 'js')

def get_message_list(path):
	"""get list of messages from a code file"""
	import re
	messages = []
	with open(path, 'r') as sourcefile:
		txt = sourcefile.read()
		messages += re.findall('_\("([^"]*)"\)', txt)
		messages += re.findall("_\('([^']*)'\)", txt)
		messages += re.findall('_\("{3}([^"]*)"{3}\)', txt, re.S)

	return messages

def write_messages_file(path, messages, mtype):
	"""write messages to translation file"""
	if not os.path.exists(path):
		return

	if not os.path.exists(os.path.join(path, 'locale')):
		os.makedirs(os.path.join(path, 'locale'))

	fname = os.path.join(path, 'locale', '_messages_' + mtype + '.json')
	messages = list(set(messages))
	filtered = []
	for m in messages:
		if m and re.search('[a-zA-Z]+', m):
			filtered.append(m)
	with open(fname, 'w') as msgfile:
		msgfile.write(json.dumps(filtered, indent=1))

def export_messages(lang, outfile):
	"""get list of all messages"""
	messages = {}
	# extract messages
	for (basepath, folders, files) in os.walk('.'):
		def _get_messages(messages, basepath, mtype):
			mlist = get_messages(basepath, mtype)
			if not mlist:
				return

			# update messages with already existing translations
			langdata = get_lang_data(basepath, lang, mtype)
			for m in mlist:
				if not messages.get(m):
					messages[m] = langdata.get(m, "")

		if os.path.basename(basepath)=='locale':
			_get_messages(messages, basepath, 'doc')
			_get_messages(messages, basepath, 'py')
			_get_messages(messages, basepath, 'js')

	# remove duplicates
	if outfile:
		from csv import writer
		with open(outfile, 'w') as msgfile:
			w = writer(msgfile)
			keys = messages.keys()
			keys.sort()
			for m in keys:

				w.writerow([m.encode('utf-8'), messages.get(m, '').encode('utf-8')])

def import_messages(lang, infile):
	"""make individual message files for each language"""
	data = dict(get_all_messages_from_file(infile))

	for (basepath, folders, files) in os.walk('.'):
		def _update_lang_file(mtype):
			"""create a langauge file for the given message type"""
			messages = get_messages(basepath, mtype)
			if not messages: return

			# read existing
			langdata = get_lang_data(basepath, lang, mtype)

			# update fresh
			for m in messages:
				if data.get(m):
					langdata[m] = data.get(m)

			if langdata:
				# write new langfile
				langfilename = os.path.join(basepath, lang + '-' + mtype + '.json')
				with open(langfilename, 'w') as langfile:
					langfile.write(json.dumps(langdata, indent=1, sort_keys=True).encode('utf-8'))
				#print 'wrote ' + langfilename

		if os.path.basename(basepath)=='locale':
			# make / update lang files for each type of message file (doc, js, py)
			# example: hi-doc.json, hi-js.json, hi-py.json
			_update_lang_file('doc')
			_update_lang_file('js')
			_update_lang_file('py')

docs_loaded = []
def load_doc_messages(module, doctype, name):
	if webnotes.lang=="en":
		return {}

	global docs_loaded
	doc_path = get_doc_path(module, doctype, name)

	# don't repload the same doc again
	if (webnotes.lang + ":" + doc_path) in docs_loaded:
		return

	docs_loaded.append(webnotes.lang + ":" + doc_path)

	global messages
	messages.update(get_lang_data(doc_path, None, 'doc'))

def get_lang_data(basepath, lang, mtype):
	"""get language dict from langfile"""

	# add "locale" folder if reqd
	if os.path.basename(basepath) != 'locale':
		basepath = os.path.join(basepath, 'locale')

	if not lang: lang = webnotes.lang

	path = os.path.join(basepath, lang + '-' + mtype + '.json')

	langdata = {}
	if os.path.exists(path):
		with codecs.open(path, 'r', 'utf-8') as langfile:
			langdata = json.loads(langfile.read())

	return langdata

def get_messages(basepath, mtype):
	"""load list of messages from _message files"""
	# get message list
	path = os.path.join(basepath, '_messages_' + mtype + '.json')
	messages = []
	if os.path.exists(path):
		with open(path, 'r') as msgfile:
			messages = json.loads(msgfile.read())

	return messages

def update_lang_js(jscode, path):
	return jscode + "\n\n$.extend(wn._messages, %s)" % \
		json.dumps(get_lang_data(path, webnotes.lang, 'js'))

def get_all_messages_from_file(path):
	with codecs.open(path, 'r', 'utf-8') as msgfile:
		data = msgfile.read()
		data = reader([r.encode('utf-8') for r in data.splitlines()])
		newdata = []
		for row in data:
			newrow = []
			for val in row:
				newrow.append(unicode(val, 'utf-8'))
			newdata.append(newrow)

	return newdata

def google_translate(lang, infile, outfile):
	"""translate objects using Google API. Add you own API key for translation"""
	data = get_all_messages_from_file(infile)

	import requests
	from webnotes import conf

	old_translations = {}

	# update existing translations
	if os.path.exists(outfile):
		with codecs.open(outfile, "r", "utf-8") as oldfile:
			old_data = oldfile.read()
			old_translations = dict(reader([r.encode('utf-8').strip() for r in old_data.splitlines()]))

	with open(outfile, 'w') as msgfile:
		from csv import writer
		w = writer(msgfile)
		for row in data:
			if row[0] and row[0].strip():
				if old_translations.get(row[0].strip()):
					row[1] = old_translations[row[0].strip()]
				else:
					print 'translating: ' + row[0]
					response = requests.get("""https://www.googleapis.com/language/translate/v2""",
						params = {
							"key": conf.google_api_key,
							"source": "en",
							"target": lang,
							"q": row[0]
						})

					if "error" in response.json:
						print response.json
						continue

					row[1] = response.json["data"]["translations"][0]["translatedText"]
					if not row[1]:
						row[1] = row[0] # google unable to translate!

					row[1] = row[1].encode('utf-8')

				row[0] = row[0].encode('utf-8')
				w.writerow(row)