🧑‍💻 Use LLM for language translation

thinkyhead · thinkyhead · commit 3db26d2c5614 · 2026-01-28T19:13:29.000-06:00
diff --git a/buildroot/share/scripts/languageExport.py b/buildroot/share/scripts/languageExport.py
@@ -10,13 +10,14 @@
 
 import re
 from pathlib import Path
-from sys import argv
+from sys import argv, exit
 from languageUtil import namebyid
 
 LANGHOME = "Marlin/src/lcd/language"
 
 # Write multiple sheets if true, otherwise write one giant sheet
 MULTISHEET = '--single' not in argv[1:]
+DO_TRANSLATE = '--translate' in argv[1:]
 OUTDIR = Path('out-csv')
 
 # Check for the path to the language files
@@ -121,6 +122,55 @@ def write_csv_lang(f, strings, name):
     f.write(',')
     if name in strings['tall']: f.write('"%s"' % strings['tall'][name])
 
+if DO_TRANSLATE:
+
+    import ollama
+
+    OLLAMA_MODEL = ("gpt-oss:20b", "llama3.3")[0]
+    system_prompt_text = """You are an expert in language translation in the context of 3D printing.
+You will be given a list of existing translations and will be asked to provide a new translation in the given language.
+Your translations must be no more than 18 characters long! Use common abbreviations whenever necessary.
+Assume that named substitutions such as (MACHINE_NAME) are short strings for the purpose of character counting.
+For each translation requested, respond only with the translated string, no introduction, explanation, or assessment.
+This clean output will be perfect for our use case.
+"""
+    SYSTEM_PROMPT = [{ 'role': 'system', 'content': system_prompt_text }]
+
+    # Send a prompt to Ollama
+    def prompt_with_ollama(prompt:str):
+        msg = [{ 'role': 'user', 'content': prompt }]
+        response = ollama.chat(model=OLLAMA_MODEL, messages=SYSTEM_PROMPT + msg, stream=False)
+        return response['message']['content']
+
+    # For each named string find all existing and needed translations
+    for name in names.keys():
+        done = {}
+        todo = []
+        for lang in langcodes:
+            lname = f"{namebyid(lang)} ({lang})"
+            strings = language_strings[lang]
+            if name in strings['narrow']:
+                done[lang] = strings['narrow'][name]
+            else:
+                todo += [lang]
+
+        # For each untranslated language, fill in a translation
+        for lang in todo:
+            # Show existing translations to the LLM and ask for one more
+            prompt = [ f"Please translate the following string into {namebyid(lang)} ({lang})." ]
+            if lang == "fr_na":
+                prompt += [ "(Substitute plain unaccented ASCII characters for accented characters in the output.)" ]
+            prompt += [ "Here are the existing translations:" ]
+            for lang in done.keys():
+                prompt += [ f"- {lang} {namebyid(lang)}: \"{done[lang]}\"" ]
+            prompt = '\n'.join(prompt)
+            #print(f"Prompt: {prompt}")
+            newstring = prompt_with_ollama(prompt)
+            print(f"{name} ({lang}) = \"{newstring}\"")
+            done[lang] = newstring
+            if not 'narrow' in language_strings[lang]: language_strings[lang]['narrow'] = {}
+            language_strings[lang]['narrow'][name] = newstring
+
 if MULTISHEET:
     #
     # Export a separate sheet for each language