Add workflow check into cli tool

2025-02-27 20:30:12 +00:00 · 2025-01-01 17:21:39 +01:00 · 2025-01-01 17:21:39 +01:00 · a9b3aba2a0
commit a9b3aba2a0
parent f356850aa7
1 changed files with 123 additions and 1 deletions
--- a/translate.py
+++ b/translate.py
@ -1,7 +1,126 @@
 import os
+import re
 import sys
 import polib

+def are_curly_brackets_matched(input_str):
+    stack = []
+    escaped = False
+    for char in input_str:
+        if char == "\\":
+            escaped = True
+            continue
+        if escaped:
+            escaped = False
+            continue
+        if char == "{":
+            stack.append("{")
+        elif char == "}":
+            if not stack:
+                return False
+            stack.pop()
+    return not stack
+
+def contain_smart_quotes(line):
+    # Check for ”
+    l = line.strip()
+    return bool(
+        l.startswith("msgid ”")
+        or l.startswith("msgstr ”")
+        or l.startswith("”")
+    )
+
+def find_erroneous_translations(file_path):
+    with open(file_path, "r", encoding="utf-8") as file:
+        file_lines = file.readlines()
+
+    found_error = False
+    index = 0
+    msgids = []
+    msgstrs = []
+    lineids = []
+
+    for i, line in enumerate(file_lines):
+        if not are_curly_brackets_matched(line):
+            found_error = True
+            print(f"Error: {file_path}\nLine {i} has mismatched curly braces:\n{line}")
+        if contain_smart_quotes(line):
+            found_error = True
+            print(f"Error: {file_path}\nLine {i} contains invalid quotes:\n{line}")
+
+    m_id = ""
+    m_msg = ""
+    while index < len(file_lines):
+        try:
+            if file_lines[index].strip() == "" or file_lines[index].startswith("#"):
+                pass
+            else:
+                msgids.append("")
+                lineids.append(index)
+                # Find msgid and all multi-lined message ids
+                if re.match('msgid \s*"(.*)"', file_lines[index]):
+                    m = re.match('msgid \s*"(.*)"', file_lines[index])
+                    msgids[-1] = m.group(1)
+                    m_id = m.group(1)
+                    index += 1
+                    if index >= len(file_lines):
+                        break
+                    while re.match('^"(.*)"$', file_lines[index]):
+                        m = re.match('^"(.*)"$', file_lines[index])
+                        msgids[-1] += m.group(1)
+                        m_id += m.group(1)
+                        index += 1
+                msgstrs.append("")
+                m_msg = ""
+                # find all message strings and all multi-line message strings
+                if re.match('msgstr "(.*)"', file_lines[index]):
+                    m = re.match('msgstr "(.*)"', file_lines[index])
+                    msgstrs[-1] += m.group(1)
+                    m_msg += m.group(1)
+                    index += 1
+                    while re.match('^"(.*)"$', file_lines[index]):
+                        m = re.match('^"(.*)"$', file_lines[index])
+                        msgstrs[-1] += m.group(1)
+                        m_msg += m.group(1)
+                        index += 1
+            index += 1
+        except IndexError:
+            break
+
+    if len(msgids) != len(msgstrs):
+        print(
+            f"Error: Inconsistent Count of msgid/msgstr {file_path}: {len(msgstrs)} to {len(msgids)}"
+        )
+        found_error = True
+
+    for msgid, msgstr in zip(msgids, msgstrs):
+        # Find words inside curly brackets in both msgid and msgstr
+        words_msgid = re.findall(r"\{(.+?)\}", msgid)
+        words_msgstr = re.findall(r"\{(.+?)\}", msgstr)
+        if not words_msgstr or not words_msgid:
+            continue
+
+        # Compare words and check for differences
+        for word_msgstr in words_msgstr:
+            if word_msgstr not in words_msgid:
+                print(
+                    f"Error: Inconsistent translation in {file_path}: '{word_msgstr}' in msgstr, {words_msgid} in msgids"
+                )
+                found_error = True
+
+    erct = 0
+    idx = 0
+    er_s = list()
+    for msgid, msgstr, line in zip(msgids, msgstrs, lineids):
+        idx += 1
+        if len(msgid) == 0 and len(msgstr) == 0:
+            erct += 1
+            er_s.append(str(line))
+    if erct > 0:
+        print (f"{erct} empty pair{'s' if erct==0 else ''} msgid '' + msgstr '' found in {file_path}\n{','.join(er_s)}")
+        found_error = True
+    return found_error
+

 # Simple tool to recursively translate all .po-files into their .mo-equivalents under ./locale/LC_MESSAGES
 def create_mo_files(force:bool, locales:list):
@ -21,6 +140,10 @@ def create_mo_files(force:bool, locales:list):
        po_files = [f for f in next(os.walk(d))[2] if os.path.splitext(f)[1] == ".po"]
        for po_file in po_files:
            filename, extension = os.path.splitext(po_file)
+            if find_erroneous_translations(d + po_file):
+                print (f"Skipping {d + po_file} as invalid...")
+                counts[2] += 1
+                continue
            mo_file = filename + ".mo"
            doit = True
            if os.path.exists(d + mo_file):
@ -58,7 +181,6 @@ def create_mo_files(force:bool, locales:list):
    )
    return data_files

-
 def main():
    force = False
    args = sys.argv[1:]