svghmi/i18n.py
changeset 3302 c89fc366bebd
parent 3218 76f1cd1291f4
child 3573 1ee56fb544fc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/svghmi/i18n.py	Thu Sep 02 21:36:29 2021 +0200
@@ -0,0 +1,353 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# This file is part of Beremiz
+# Copyright (C) 2021: Edouard TISSERANT
+#
+# See COPYING file for copyrights details.
+
+from __future__ import absolute_import
+from lxml import etree
+import os
+import sys
+import subprocess
+import time
+import ast
+import wx
+import re
+
+# to have it for python 2, had to install 
+# https://pypi.org/project/pycountry/18.12.8/
+# python2 -m pip install pycountry==18.12.8 --user
+import pycountry
+
+cmd_parser = re.compile(r'(?:"([^"]+)"\s*|([^\s]+)\s*)?')
+
+def open_pofile(pofile):
+    """ Opens PO file with POEdit """
+    
+    if sys.platform.startswith('win'):
+        from six.moves import winreg
+        poedit_cmd = None
+        try:
+            poedit_cmd = winreg.QueryValue(winreg.HKEY_LOCAL_MACHINE,
+                                           'SOFTWARE\\Classes\\poedit\\shell\\open\\command')
+            cmd = re.findall(cmd_parser, poedit_cmd)
+            dblquote_value,smpl_value = cmd[0]
+            poedit_path = dblquote_value+smpl_value
+        except OSError:
+            poedit_path = None
+
+    else:
+        try:
+            poedit_path = subprocess.check_output("command -v poedit", shell=True).strip()
+        except subprocess.CalledProcessError:
+            poedit_path = None
+
+    if poedit_path is None:
+        wx.MessageBox("POEdit is not found or installed !")
+    else:
+        subprocess.Popen([poedit_path,pofile])
+
+def EtreeToMessages(msgs):
+    """ Converts XML tree from 'extract_i18n' templates into a list of tuples """
+    messages = []
+
+    for msg in msgs:
+        messages.append((
+            "\n".join([line.text for line in msg]),
+            msg.get("label"), msg.get("id")))
+
+    return messages
+
+def SaveCatalog(fname, messages):
+    """ Save messages given as list of tupple (msg,label,id) in POT file """
+    w = POTWriter()
+    w.ImportMessages(messages)
+
+    with open(fname, 'w') as POT_file:
+        w.write(POT_file)
+
+def GetPoFiles(dirpath):
+    po_files = [fname for fname in os.listdir(dirpath) if fname.endswith(".po")]
+    po_files.sort()
+    return [(po_fname[:-3],os.path.join(dirpath, po_fname)) for po_fname in po_files]
+
+def ReadTranslations(dirpath):
+    """ Read all PO files from a directory and return a list of (langcode, translation_dict) tuples """
+
+    translations = []
+    for translation_name, po_path in GetPoFiles(dirpath):
+        r = POReader()
+        with open(po_path, 'r') as PO_file:
+            r.read(PO_file)
+            translations.append((translation_name, r.get_messages()))
+    return translations
+
+def MatchTranslations(translations, messages, errcallback):
+    """
+    Matches translations against original message catalog,
+    warn about inconsistancies,
+    returns list of langs, and a list of (msgid, [translations]) tuples
+    """
+    translated_messages = []
+    broken_lang = set()
+    for msgid,label,svgid in messages:
+        translated_message = []
+        for langcode,translation in translations:
+            msg = translation.pop(msgid, None)
+            if msg is None:
+                broken_lang.add(langcode)
+                errcallback(_('{}: Missing translation for "{}" (label:{}, id:{})\n').format(langcode,msgid,label,svgid))
+                translated_message.append(msgid)
+            else:
+                translated_message.append(msg)
+        translated_messages.append((msgid,translated_message))
+    langs = []
+    for langcode,translation in translations:
+        try:
+            l,c = langcode.split("_")
+            language_name = pycountry.languages.get(alpha_2 = l).name
+            country_name = pycountry.countries.get(alpha_2 = c).name
+            langname = "{} ({})".format(language_name, country_name)
+        except:
+            try:
+                langname = pycountry.languages.get(alpha_2 = langcode).name
+            except:
+                langname = langcode
+
+        langs.append((langname,langcode))
+
+        broken = False
+        for msgid, msg in translation.iteritems():
+            broken = True
+            errcallback(_('{}: Unused translation "{}":"{}"\n').format(langcode,msgid,msg))
+        if broken or langcode in broken_lang:
+            errcallback(_('Translation for {} is outdated, please edit {}.po, click "Catalog -> Update from POT File..." and select messages.pot.\n').format(langcode,langcode))
+
+
+    return langs,translated_messages
+
+
+def TranslationToEtree(langs,translated_messages):
+
+    result = etree.Element("translations")
+
+    langsroot = etree.SubElement(result, "langs")
+    for name, code in langs:
+        langel = etree.SubElement(langsroot, "lang", {"code":code})
+        langel.text = name
+
+    msgsroot = etree.SubElement(result, "messages")
+    for msgid, msgs in translated_messages:
+        msgidel = etree.SubElement(msgsroot, "msgid")
+        for msg in msgs:
+            msgel = etree.SubElement(msgidel, "msg")
+            for line in msg.split("\n"):
+                lineel = etree.SubElement(msgel, "line")
+                lineel.text = escape(line.encode("utf-8")).decode("utf-8")
+
+    return result
+
+
+
+locpfx = '#:svghmi.svg:'
+
+pot_header = '''\
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\\n"
+"POT-Creation-Date: %(time)s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
+"Language-Team: LANGUAGE <LL@li.org>\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=UTF-8\\n"
+"Content-Transfer-Encoding: 8bit\\n"
+"Generated-By: SVGHMI 1.0\\n"
+
+'''
+escapes = []
+
+def make_escapes(pass_iso8859):
+    global escapes
+    escapes = [chr(i) for i in range(256)]
+    if pass_iso8859:
+        # Allow iso-8859 characters to pass through so that e.g. 'msgid
+        # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we
+        # escape any character outside the 32..126 range.
+        mod = 128
+    else:
+        mod = 256
+    for i in range(mod):
+        if not(32 <= i <= 126):
+            escapes[i] = "\\%03o" % i
+    escapes[ord('\\')] = '\\\\'
+    escapes[ord('\t')] = '\\t'
+    escapes[ord('\r')] = '\\r'
+    escapes[ord('\n')] = '\\n'
+    escapes[ord('\"')] = '\\"'
+
+make_escapes(pass_iso8859 = True)
+
+EMPTYSTRING = ''
+
+def escape(s):
+    global escapes
+    s = list(s)
+    for i in range(len(s)):
+        s[i] = escapes[ord(s[i])]
+    return EMPTYSTRING.join(s)
+
+def normalize(s):
+    # This converts the various Python string types into a format that is
+    # appropriate for .po files, namely much closer to C style.
+    lines = s.split('\n')
+    if len(lines) == 1:
+        s = '"' + escape(s) + '"'
+    else:
+        if not lines[-1]:
+            del lines[-1]
+            lines[-1] = lines[-1] + '\n'
+        for i in range(len(lines)):
+            lines[i] = escape(lines[i])
+        lineterm = '\\n"\n"'
+        s = '""\n"' + lineterm.join(lines) + '"'
+    return s
+
+
+class POTWriter:
+    def __init__(self):
+        self.__messages = {}
+
+    def ImportMessages(self, msgs):
+        for  msg, label, svgid in msgs:
+            self.addentry(msg.encode("utf-8"), label, svgid)
+
+    def addentry(self, msg, label, svgid):
+        entry = (label, svgid)
+        self.__messages.setdefault(msg, set()).add(entry)
+
+    def write(self, fp):
+        timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
+        print >> fp, pot_header % {'time': timestamp}
+        reverse = {}
+        for k, v in self.__messages.items():
+            keys = list(v)
+            keys.sort()
+            reverse.setdefault(tuple(keys), []).append((k, v))
+        rkeys = reverse.keys()
+        rkeys.sort()
+        for rkey in rkeys:
+            rentries = reverse[rkey]
+            rentries.sort()
+            for k, v in rentries:
+                v = list(v)
+                v.sort()
+                locline = locpfx
+                for label, svgid in v:
+                    d = {'label': label, 'svgid': svgid}
+                    s = _(' %(label)s:%(svgid)s') % d
+                    if len(locline) + len(s) <= 78:
+                        locline = locline + s
+                    else:
+                        print >> fp, locline
+                        locline = locpfx + s
+                if len(locline) > len(locpfx):
+                    print >> fp, locline
+                print >> fp, 'msgid', normalize(k)
+                print >> fp, 'msgstr ""\n'
+
+
+class POReader:
+    def __init__(self):
+        self.__messages = {}
+
+    def get_messages(self):
+        return self.__messages
+
+    def add(self, msgid, msgstr, fuzzy):
+        "Add a non-fuzzy translation to the dictionary."
+        if not fuzzy and msgstr and msgid:
+            self.__messages[msgid.decode('utf-8')] = msgstr.decode('utf-8')
+
+    def read(self, fp):
+        ID = 1
+        STR = 2
+
+        lines = fp.readlines()
+        section = None
+        fuzzy = 0
+
+        # Parse the catalog
+        lno = 0
+        for l in lines:
+            lno += 1
+            # If we get a comment line after a msgstr, this is a new entry
+            if l[0] == '#' and section == STR:
+                self.add(msgid, msgstr, fuzzy)
+                section = None
+                fuzzy = 0
+            # Record a fuzzy mark
+            if l[:2] == '#,' and 'fuzzy' in l:
+                fuzzy = 1
+            # Skip comments
+            if l[0] == '#':
+                continue
+            # Now we are in a msgid section, output previous section
+            if l.startswith('msgid') and not l.startswith('msgid_plural'):
+                if section == STR:
+                    self.add(msgid, msgstr, fuzzy)
+                section = ID
+                l = l[5:]
+                msgid = msgstr = ''
+                is_plural = False
+            # This is a message with plural forms
+            elif l.startswith('msgid_plural'):
+                if section != ID:
+                    print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\
+                        (infile, lno)
+                    sys.exit(1)
+                l = l[12:]
+                msgid += '\0' # separator of singular and plural
+                is_plural = True
+            # Now we are in a msgstr section
+            elif l.startswith('msgstr'):
+                section = STR
+                if l.startswith('msgstr['):
+                    if not is_plural:
+                        print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
+                            (infile, lno)
+                        sys.exit(1)
+                    l = l.split(']', 1)[1]
+                    if msgstr:
+                        msgstr += '\0' # Separator of the various plural forms
+                else:
+                    if is_plural:
+                        print >> sys.stderr, 'indexed msgstr required for plural on  %s:%d' %\
+                            (infile, lno)
+                        sys.exit(1)
+                    l = l[6:]
+            # Skip empty lines
+            l = l.strip()
+            if not l:
+                continue
+            l = ast.literal_eval(l)
+            if section == ID:
+                msgid += l
+            elif section == STR:
+                msgstr += l
+            else:
+                print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \
+                      'before:'
+                print >> sys.stderr, l
+                sys.exit(1)
+        # Add last entry
+        if section == STR:
+            self.add(msgid, msgstr, fuzzy)
+
+