meerk40t/translate.py

205 lines
No EOL
7 KiB
Python

import os
import re
import sys
def are_curly_brackets_matched(input_str):
stack = []
escaped = False
for char in input_str:
if char == "\\":
escaped = True
continue
if escaped:
escaped = False
continue
if char == "{":
stack.append("{")
elif char == "}":
if not stack:
return False
stack.pop()
return not stack
def contain_smart_quotes(line):
# Check for ”
l = line.strip()
return bool(
l.startswith("msgid ”")
or l.startswith("msgstr ”")
or l.startswith("")
)
def find_erroneous_translations(file_path):
with open(file_path, "r", encoding="utf-8") as file:
file_lines = file.readlines()
found_error = False
index = 0
msgids = []
msgstrs = []
lineids = []
for i, line in enumerate(file_lines):
if not are_curly_brackets_matched(line):
found_error = True
print(f"Error: {file_path}\nLine {i} has mismatched curly braces:\n{line}")
if contain_smart_quotes(line):
found_error = True
print(f"Error: {file_path}\nLine {i} contains invalid quotes:\n{line}")
m_id = ""
m_msg = ""
while index < len(file_lines):
try:
if file_lines[index].strip() == "" or file_lines[index].startswith("#"):
pass
else:
msgids.append("")
lineids.append(index)
# Find msgid and all multi-lined message ids
if re.match('msgid \s*"(.*)"', file_lines[index]):
m = re.match('msgid \s*"(.*)"', file_lines[index])
msgids[-1] = m.group(1)
m_id = m.group(1)
index += 1
if index >= len(file_lines):
break
while re.match('^"(.*)"$', file_lines[index]):
m = re.match('^"(.*)"$', file_lines[index])
msgids[-1] += m.group(1)
m_id += m.group(1)
index += 1
msgstrs.append("")
m_msg = ""
# find all message strings and all multi-line message strings
if re.match('msgstr "(.*)"', file_lines[index]):
m = re.match('msgstr "(.*)"', file_lines[index])
msgstrs[-1] += m.group(1)
m_msg += m.group(1)
index += 1
while re.match('^"(.*)"$', file_lines[index]):
m = re.match('^"(.*)"$', file_lines[index])
msgstrs[-1] += m.group(1)
m_msg += m.group(1)
index += 1
index += 1
except IndexError:
break
if len(msgids) != len(msgstrs):
print(
f"Error: Inconsistent Count of msgid/msgstr {file_path}: {len(msgstrs)} to {len(msgids)}"
)
found_error = True
for msgid, msgstr in zip(msgids, msgstrs):
# Find words inside curly brackets in both msgid and msgstr
words_msgid = re.findall(r"\{(.+?)\}", msgid)
words_msgstr = re.findall(r"\{(.+?)\}", msgstr)
if not words_msgstr or not words_msgid:
continue
# Compare words and check for differences
for word_msgstr in words_msgstr:
if word_msgstr not in words_msgid:
print(
f"Error: Inconsistent translation in {file_path}: '{word_msgstr}' in msgstr, {words_msgid} in msgids"
)
found_error = True
erct = 0
idx = 0
er_s = list()
for msgid, msgstr, line in zip(msgids, msgstrs, lineids):
idx += 1
if len(msgid) == 0 and len(msgstr) == 0:
erct += 1
er_s.append(str(line))
if erct > 0:
print (f"{erct} empty pair{'s' if erct==0 else ''} msgid '' + msgstr '' found in {file_path}\n{','.join(er_s)}")
found_error = True
return found_error
# Simple tool to recursively translate all .po-files into their .mo-equivalents under ./locale/LC_MESSAGES
def create_mo_files(force:bool, locales:list):
try:
import polib
except ImportError:
print ("polib missing - you need to install that library, eg. 'pip install polib'")
return
data_files = []
localedir = "./locale"
po_dirs = []
po_locales = []
for l in next(os.walk(localedir))[1]:
po_dirs.append(localedir + "/" + l + "/LC_MESSAGES/")
po_locales.append(l)
counts = [0, 0, 0]
for d_local, d in zip(po_locales, po_dirs):
if len(locales) > 0 and d_local not in locales:
print (f"Skip locale {d_local}")
continue
mo_files = []
po_files = [f for f in next(os.walk(d))[2] if os.path.splitext(f)[1] == ".po"]
for po_file in po_files:
filename, extension = os.path.splitext(po_file)
if find_erroneous_translations(d + po_file):
print (f"Skipping {d + po_file} as invalid...")
counts[2] += 1
continue
mo_file = filename + ".mo"
doit = True
if os.path.exists(d + mo_file):
res = polib.detect_encoding(d + po_file)
res2 = polib.detect_encoding(d + mo_file)
po_date = os.path.getmtime(d + po_file)
mo_date = os.path.getmtime(d + mo_file)
if mo_date > po_date:
print(f"mo-File for {d}{po_file} is newer (input encoded={res}, output encoded={res2})...")
doit = False
if doit or force:
if doit:
action = "Translate"
else:
action = "Forced translate"
res = polib.detect_encoding(d + po_file)
try:
po = polib.pofile(d + po_file)
po.save_as_mofile(d + mo_file)
except OSError as err:
print(f"Unexpected {err=}")
counts[2] += 1
continue
res2 = polib.detect_encoding(d + mo_file)
print(f"{action} {d}{po_file} (input encoded={res}, output encoded={res2})")
mo_files.append(d + mo_file)
counts[0] += 1
else:
counts[1] += 1
data_files.append((d, mo_files))
print(
f"Total: {counts[0] + counts[1]}, Translated: {counts[0]}, Ignored: {counts[1]}, Errors: {counts[2]}"
)
return data_files
def main():
force = False
args = sys.argv[1:]
locales = []
if len(args) > 0:
locales = list(a for a in args)
if locales[0].lower() == "force":
force = True
locales.pop(0)
print("Usage: python ./translate.py <locales>")
if len(locales):
print(f"Will compile po-files for {','.join(locales)}")
else:
print("Will compile all po-files")
create_mo_files(force, locales)
main()