Commit 6fb9dee4 authored by Ramiro Morales's avatar Ramiro Morales
Browse files

Fixed #23271 -- Don't corrupt PO files on Windows when updating them.

Make sure PO catalog text fetched from gettext programs via standard
output isn't corrupted by mismatch between assumed (UTF-8) and real
(CP1252) encodings. This can cause mojibake to be written when creating
or updating PO files.

Also fixes #23311.

Thanks to contributor with Trac nick 'danielmenzel' for the report,
excellent research and fix.
parent 1ee9507e
Loading
Loading
Loading
Loading
+20 −5
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import fnmatch
import glob
import io
import locale
import os
import re
import sys
@@ -30,6 +31,20 @@ def check_programs(*programs):
                    "gettext tools 0.15 or newer installed." % program)


def gettext_popen_wrapper(args, os_err_exc_type=CommandError):
    """
    Makes sure text obtained from stdout of gettext utilities contains valid
    Unicode on Windows.
    """
    stdout, stderr, status_code = popen_wrapper(args, os_err_exc_type=os_err_exc_type)
    if os.name == 'nt':
        # This looks weird because it's undoing what subprocess.Popen(universal_newlines=True).communicate()
        # does when capturing PO files contents from stdout of gettext command line programs. See ticket #23271
        # for details.
        stdout = stdout.encode(locale.getpreferredencoding(False)).decode('utf-8')
    return stdout, stderr, status_code


@total_ordering
class TranslatableFile(object):
    def __init__(self, dirpath, file_name, locale_dir):
@@ -115,7 +130,7 @@ class TranslatableFile(object):
            args.append(work_file)
        else:
            return
        msgs, errors, status = popen_wrapper(args)
        msgs, errors, status = gettext_popen_wrapper(args)
        if errors:
            if status != STATUS_OK:
                if is_templatized:
@@ -309,7 +324,7 @@ class Command(BaseCommand):

    @cached_property
    def gettext_version(self):
        out, err, status = popen_wrapper(['xgettext', '--version'])
        out, err, status = gettext_popen_wrapper(['xgettext', '--version'])
        m = re.search(r'(\d)\.(\d+)\.?(\d+)?', out)
        if m:
            return tuple(int(d) for d in m.groups() if d is not None)
@@ -334,7 +349,7 @@ class Command(BaseCommand):
            if not os.path.exists(potfile):
                continue
            args = ['msguniq'] + self.msguniq_options + [potfile]
            msgs, errors, status = popen_wrapper(args)
            msgs, errors, status = gettext_popen_wrapper(args)
            if six.PY2:
                msgs = msgs.decode('utf-8')
            if errors:
@@ -426,7 +441,7 @@ class Command(BaseCommand):

        if os.path.exists(pofile):
            args = ['msgmerge'] + self.msgmerge_options + [pofile, potfile]
            msgs, errors, status = popen_wrapper(args)
            msgs, errors, status = gettext_popen_wrapper(args)
            if six.PY2:
                msgs = msgs.decode('utf-8')
            if errors:
@@ -447,7 +462,7 @@ class Command(BaseCommand):

        if self.no_obsolete:
            args = ['msgattrib'] + self.msgattrib_options + ['-o', pofile, pofile]
            msgs, errors, status = popen_wrapper(args)
            msgs, errors, status = gettext_popen_wrapper(args)
            if errors:
                if status != STATUS_OK:
                    raise CommandError(
+2 −0
Original line number Diff line number Diff line
@@ -10,3 +10,5 @@ dummy2 = _("This is another translatable string.")
# shouldn't create a .po file with duplicate `Plural-Forms` headers
number = 3
dummuy3 = ungettext("%(number)s Foo", "%(number)s Foos", number) % {'number': number}

dummy4 = _('Size')
+16 −0
Original line number Diff line number Diff line
msgid ""
msgstr ""
"Project-Id-Version: \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2014-03-03 10:44+0100\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: \n"
"Language-Team: \n"
"Language: pt_BR\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=(n > 1);\n"

msgid "Size"
msgstr "Größe"
+24 −5
Original line number Diff line number Diff line
@@ -67,14 +67,21 @@ class ExtractorTests(SimpleTestCase):
            po_contents = fp.read()
        return output, po_contents

    def assertMsgId(self, msgid, s, use_quotes=True):
    def _assertPoKeyword(self, keyword, expected_value, haystack, use_quotes=True):
        q = '"'
        if use_quotes:
            msgid = '"%s"' % msgid
            expected_value = '"%s"' % expected_value
            q = "'"
        needle = 'msgid %s' % msgid
        msgid = re.escape(msgid)
        return self.assertTrue(re.search('^msgid %s' % msgid, s, re.MULTILINE), 'Could not find %(q)s%(n)s%(q)s in generated PO file' % {'n': needle, 'q': q})
        needle = '%s %s' % (keyword, expected_value)
        expected_value = re.escape(expected_value)
        return self.assertTrue(re.search('^%s %s' % (keyword, expected_value), haystack, re.MULTILINE),
                               'Could not find %(q)s%(n)s%(q)s in generated PO file' % {'n': needle, 'q': q})

    def assertMsgId(self, msgid, haystack, use_quotes=True):
        return self._assertPoKeyword('msgid', msgid, haystack, use_quotes=use_quotes)

    def assertMsgStr(self, msgstr, haystack, use_quotes=True):
        return self._assertPoKeyword('msgstr', msgstr, haystack, use_quotes=use_quotes)

    def assertNotMsgId(self, msgid, s, use_quotes=True):
        if use_quotes:
@@ -391,6 +398,18 @@ class BasicExtractorTests(ExtractorTests):
        with six.assertRaisesRegex(self, CommandError, "Unable to get gettext version. Is it installed?"):
            cmd.gettext_version

    def test_po_file_encoding_when_updating(self):
        """Update of PO file doesn't corrupt it with non-UTF-8 encoding on Python3+Windows (#23271)"""
        BR_PO_BASE = 'locale/pt_BR/LC_MESSAGES/django'
        os.chdir(self.test_dir)
        shutil.copyfile(BR_PO_BASE + '.pristine', BR_PO_BASE + '.po')
        self.addCleanup(self.rmfile, os.path.join(self.test_dir, 'locale', 'pt_BR', 'LC_MESSAGES', 'django.po'))
        management.call_command('makemessages', locale=['pt_BR'], verbosity=0)
        self.assertTrue(os.path.exists(BR_PO_BASE + '.po'))
        with io.open(BR_PO_BASE + '.po', 'r', encoding='utf-8') as fp:
            po_contents = force_text(fp.read())
            self.assertMsgStr("Größe", po_contents)


class JavascriptExtractorTests(ExtractorTests):