Add workflow check into cli tool

This commit is contained in:
jpirnay 2025-01-01 17:21:39 +01:00
parent f356850aa7
commit a9b3aba2a0

View file

@ -1,7 +1,126 @@
import os
import re
import sys
import polib
def are_curly_brackets_matched(input_str):
stack = []
escaped = False
for char in input_str:
if char == "\\":
escaped = True
continue
if escaped:
escaped = False
continue
if char == "{":
stack.append("{")
elif char == "}":
if not stack:
return False
stack.pop()
return not stack
def contain_smart_quotes(line):
# Check for ”
l = line.strip()
return bool(
l.startswith("msgid ”")
or l.startswith("msgstr ”")
or l.startswith("")
)
def find_erroneous_translations(file_path):
with open(file_path, "r", encoding="utf-8") as file:
file_lines = file.readlines()
found_error = False
index = 0
msgids = []
msgstrs = []
lineids = []
for i, line in enumerate(file_lines):
if not are_curly_brackets_matched(line):
found_error = True
print(f"Error: {file_path}\nLine {i} has mismatched curly braces:\n{line}")
if contain_smart_quotes(line):
found_error = True
print(f"Error: {file_path}\nLine {i} contains invalid quotes:\n{line}")
m_id = ""
m_msg = ""
while index < len(file_lines):
try:
if file_lines[index].strip() == "" or file_lines[index].startswith("#"):
pass
else:
msgids.append("")
lineids.append(index)
# Find msgid and all multi-lined message ids
if re.match('msgid \s*"(.*)"', file_lines[index]):
m = re.match('msgid \s*"(.*)"', file_lines[index])
msgids[-1] = m.group(1)
m_id = m.group(1)
index += 1
if index >= len(file_lines):
break
while re.match('^"(.*)"$', file_lines[index]):
m = re.match('^"(.*)"$', file_lines[index])
msgids[-1] += m.group(1)
m_id += m.group(1)
index += 1
msgstrs.append("")
m_msg = ""
# find all message strings and all multi-line message strings
if re.match('msgstr "(.*)"', file_lines[index]):
m = re.match('msgstr "(.*)"', file_lines[index])
msgstrs[-1] += m.group(1)
m_msg += m.group(1)
index += 1
while re.match('^"(.*)"$', file_lines[index]):
m = re.match('^"(.*)"$', file_lines[index])
msgstrs[-1] += m.group(1)
m_msg += m.group(1)
index += 1
index += 1
except IndexError:
break
if len(msgids) != len(msgstrs):
print(
f"Error: Inconsistent Count of msgid/msgstr {file_path}: {len(msgstrs)} to {len(msgids)}"
)
found_error = True
for msgid, msgstr in zip(msgids, msgstrs):
# Find words inside curly brackets in both msgid and msgstr
words_msgid = re.findall(r"\{(.+?)\}", msgid)
words_msgstr = re.findall(r"\{(.+?)\}", msgstr)
if not words_msgstr or not words_msgid:
continue
# Compare words and check for differences
for word_msgstr in words_msgstr:
if word_msgstr not in words_msgid:
print(
f"Error: Inconsistent translation in {file_path}: '{word_msgstr}' in msgstr, {words_msgid} in msgids"
)
found_error = True
erct = 0
idx = 0
er_s = list()
for msgid, msgstr, line in zip(msgids, msgstrs, lineids):
idx += 1
if len(msgid) == 0 and len(msgstr) == 0:
erct += 1
er_s.append(str(line))
if erct > 0:
print (f"{erct} empty pair{'s' if erct==0 else ''} msgid '' + msgstr '' found in {file_path}\n{','.join(er_s)}")
found_error = True
return found_error
# Simple tool to recursively translate all .po-files into their .mo-equivalents under ./locale/LC_MESSAGES
def create_mo_files(force:bool, locales:list):
@ -21,6 +140,10 @@ def create_mo_files(force:bool, locales:list):
po_files = [f for f in next(os.walk(d))[2] if os.path.splitext(f)[1] == ".po"]
for po_file in po_files:
filename, extension = os.path.splitext(po_file)
if find_erroneous_translations(d + po_file):
print (f"Skipping {d + po_file} as invalid...")
counts[2] += 1
continue
mo_file = filename + ".mo"
doit = True
if os.path.exists(d + mo_file):
@ -58,7 +181,6 @@ def create_mo_files(force:bool, locales:list):
)
return data_files
def main():
force = False
args = sys.argv[1:]