Commit 9368f51e authored by Claude Paroz's avatar Claude Paroz
Browse files

Fixed #20197 -- Made XML serializer fail loudly when outputting unserializable chars

Thanks Tim Graham for the review.
parent b769bbd4
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -14,7 +14,9 @@ from django.conf import settings
from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models
from django.utils.encoding import smart_text
from django.utils.xmlutils import SimplerXMLGenerator
from django.utils.xmlutils import (
    SimplerXMLGenerator, UnserializableContentError,
)


class Serializer(base.Serializer):
@@ -78,7 +80,11 @@ class Serializer(base.Serializer):

        # Get a "string version" of the object's data.
        if getattr(obj, field.name) is not None:
            try:
                self.xml.characters(field.value_to_string(obj))
            except UnserializableContentError:
                raise ValueError("%s.%s (pk:%s) contains unserializable characters" % (
                    obj.__class__.__name__, field.name, obj._get_pk_val()))
        else:
            self.xml.addQuickElement("None")

+12 −0
Original line number Diff line number Diff line
@@ -2,9 +2,14 @@
Utilities for XML generation/parsing.
"""

import re
from xml.sax.saxutils import XMLGenerator


class UnserializableContentError(ValueError):
    pass


class SimplerXMLGenerator(XMLGenerator):
    def addQuickElement(self, name, contents=None, attrs=None):
        "Convenience method for adding an element with no children"
@@ -14,3 +19,10 @@ class SimplerXMLGenerator(XMLGenerator):
        if contents is not None:
            self.characters(contents)
        self.endElement(name)

    def characters(self, content):
        if content and re.search(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', content):
            # Fail loudly when content has control chars (unsupported in XML 1.0)
            # See http://www.w3.org/International/questions/qa-controls
            raise UnserializableContentError("Control characters are not supported in XML 1.0")
        XMLGenerator.characters(self, content)
+4 −0
Original line number Diff line number Diff line
@@ -720,6 +720,10 @@ Miscellaneous
* Private function ``django.utils.functional.total_ordering()`` has been
  removed. It contained a workaround for a ``functools.total_ordering()`` bug
  in Python versions older than 2.7.3.
* XML serialization (either through :djadmin:`dumpdata` or the syndication
  framework) used to output any characters it received. Now if the content to
  be serialized contains any control characters not allowed in the XML 1.0
  standard, the serialization will fail with a :exc:`ValueError`.

.. _deprecated-features-1.9:

+10 −0
Original line number Diff line number Diff line
@@ -213,6 +213,16 @@ the auth.User model has such a relation to the auth.Permission model::

This example links the given user with the permission models with PKs 46 and 47.

.. admonition:: Control characters

    .. versionchanged:: 1.9

    If the content to be serialized contains control characters that are not
    accepted in the XML 1.0 standard, the serialization will fail with a
    :exc:`ValueError` exception. Read also the W3C's explanation of `HTML,
    XHTML, XML and Control Codes
    <http://www.w3.org/International/questions/qa-controls>`_.

.. _serialization-formats-json:

JSON
+15 −0
Original line number Diff line number Diff line
@@ -371,6 +371,21 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase):
                ret_list.append("".join(temp))
        return ret_list

    def test_control_char_failure(self):
        """
        Serializing control characters with XML should fail as those characters
        are not supported in the XML 1.0 standard (except HT, LF, CR).
        """
        self.a1.headline = "This contains \u0001 control \u0011 chars"
        msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk
        with self.assertRaisesMessage(ValueError, msg):
            serializers.serialize(self.serializer_name, [self.a1])
        self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed"
        self.assertIn(
            "HT \t, LF \n, and CR \r are allowed",
            serializers.serialize(self.serializer_name, [self.a1])
        )


class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase):
    serializer_name = "xml"