Loading django/core/serializers/xml_serializer.py +8 −2 Original line number Diff line number Diff line Loading @@ -14,7 +14,9 @@ from django.conf import settings from django.core.serializers import base from django.db import DEFAULT_DB_ALIAS, models from django.utils.encoding import smart_text from django.utils.xmlutils import SimplerXMLGenerator from django.utils.xmlutils import ( SimplerXMLGenerator, UnserializableContentError, ) class Serializer(base.Serializer): Loading Loading @@ -78,7 +80,11 @@ class Serializer(base.Serializer): # Get a "string version" of the object's data. if getattr(obj, field.name) is not None: try: self.xml.characters(field.value_to_string(obj)) except UnserializableContentError: raise ValueError("%s.%s (pk:%s) contains unserializable characters" % ( obj.__class__.__name__, field.name, obj._get_pk_val())) else: self.xml.addQuickElement("None") Loading django/utils/xmlutils.py +12 −0 Original line number Diff line number Diff line Loading @@ -2,9 +2,14 @@ Utilities for XML generation/parsing. """ import re from xml.sax.saxutils import XMLGenerator class UnserializableContentError(ValueError): pass class SimplerXMLGenerator(XMLGenerator): def addQuickElement(self, name, contents=None, attrs=None): "Convenience method for adding an element with no children" Loading @@ -14,3 +19,10 @@ class SimplerXMLGenerator(XMLGenerator): if contents is not None: self.characters(contents) self.endElement(name) def characters(self, content): if content and re.search(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', content): # Fail loudly when content has control chars (unsupported in XML 1.0) # See http://www.w3.org/International/questions/qa-controls raise UnserializableContentError("Control characters are not supported in XML 1.0") XMLGenerator.characters(self, content) docs/releases/1.9.txt +4 −0 Original line number Diff line number Diff line Loading @@ -720,6 +720,10 @@ Miscellaneous * Private function ``django.utils.functional.total_ordering()`` has been removed. It contained a workaround for a ``functools.total_ordering()`` bug in Python versions older than 2.7.3. * XML serialization (either through :djadmin:`dumpdata` or the syndication framework) used to output any characters it received. Now if the content to be serialized contains any control characters not allowed in the XML 1.0 standard, the serialization will fail with a :exc:`ValueError`. .. _deprecated-features-1.9: Loading docs/topics/serialization.txt +10 −0 Original line number Diff line number Diff line Loading @@ -213,6 +213,16 @@ the auth.User model has such a relation to the auth.Permission model:: This example links the given user with the permission models with PKs 46 and 47. .. admonition:: Control characters .. versionchanged:: 1.9 If the content to be serialized contains control characters that are not accepted in the XML 1.0 standard, the serialization will fail with a :exc:`ValueError` exception. Read also the W3C's explanation of `HTML, XHTML, XML and Control Codes <http://www.w3.org/International/questions/qa-controls>`_. .. _serialization-formats-json: JSON Loading tests/serializers/tests.py +15 −0 Original line number Diff line number Diff line Loading @@ -371,6 +371,21 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase): ret_list.append("".join(temp)) return ret_list def test_control_char_failure(self): """ Serializing control characters with XML should fail as those characters are not supported in the XML 1.0 standard (except HT, LF, CR). """ self.a1.headline = "This contains \u0001 control \u0011 chars" msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk with self.assertRaisesMessage(ValueError, msg): serializers.serialize(self.serializer_name, [self.a1]) self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed" self.assertIn( "HT \t, LF \n, and CR \r are allowed", serializers.serialize(self.serializer_name, [self.a1]) ) class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase): serializer_name = "xml" Loading Loading
django/core/serializers/xml_serializer.py +8 −2 Original line number Diff line number Diff line Loading @@ -14,7 +14,9 @@ from django.conf import settings from django.core.serializers import base from django.db import DEFAULT_DB_ALIAS, models from django.utils.encoding import smart_text from django.utils.xmlutils import SimplerXMLGenerator from django.utils.xmlutils import ( SimplerXMLGenerator, UnserializableContentError, ) class Serializer(base.Serializer): Loading Loading @@ -78,7 +80,11 @@ class Serializer(base.Serializer): # Get a "string version" of the object's data. if getattr(obj, field.name) is not None: try: self.xml.characters(field.value_to_string(obj)) except UnserializableContentError: raise ValueError("%s.%s (pk:%s) contains unserializable characters" % ( obj.__class__.__name__, field.name, obj._get_pk_val())) else: self.xml.addQuickElement("None") Loading
django/utils/xmlutils.py +12 −0 Original line number Diff line number Diff line Loading @@ -2,9 +2,14 @@ Utilities for XML generation/parsing. """ import re from xml.sax.saxutils import XMLGenerator class UnserializableContentError(ValueError): pass class SimplerXMLGenerator(XMLGenerator): def addQuickElement(self, name, contents=None, attrs=None): "Convenience method for adding an element with no children" Loading @@ -14,3 +19,10 @@ class SimplerXMLGenerator(XMLGenerator): if contents is not None: self.characters(contents) self.endElement(name) def characters(self, content): if content and re.search(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', content): # Fail loudly when content has control chars (unsupported in XML 1.0) # See http://www.w3.org/International/questions/qa-controls raise UnserializableContentError("Control characters are not supported in XML 1.0") XMLGenerator.characters(self, content)
docs/releases/1.9.txt +4 −0 Original line number Diff line number Diff line Loading @@ -720,6 +720,10 @@ Miscellaneous * Private function ``django.utils.functional.total_ordering()`` has been removed. It contained a workaround for a ``functools.total_ordering()`` bug in Python versions older than 2.7.3. * XML serialization (either through :djadmin:`dumpdata` or the syndication framework) used to output any characters it received. Now if the content to be serialized contains any control characters not allowed in the XML 1.0 standard, the serialization will fail with a :exc:`ValueError`. .. _deprecated-features-1.9: Loading
docs/topics/serialization.txt +10 −0 Original line number Diff line number Diff line Loading @@ -213,6 +213,16 @@ the auth.User model has such a relation to the auth.Permission model:: This example links the given user with the permission models with PKs 46 and 47. .. admonition:: Control characters .. versionchanged:: 1.9 If the content to be serialized contains control characters that are not accepted in the XML 1.0 standard, the serialization will fail with a :exc:`ValueError` exception. Read also the W3C's explanation of `HTML, XHTML, XML and Control Codes <http://www.w3.org/International/questions/qa-controls>`_. .. _serialization-formats-json: JSON Loading
tests/serializers/tests.py +15 −0 Original line number Diff line number Diff line Loading @@ -371,6 +371,21 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase): ret_list.append("".join(temp)) return ret_list def test_control_char_failure(self): """ Serializing control characters with XML should fail as those characters are not supported in the XML 1.0 standard (except HT, LF, CR). """ self.a1.headline = "This contains \u0001 control \u0011 chars" msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk with self.assertRaisesMessage(ValueError, msg): serializers.serialize(self.serializer_name, [self.a1]) self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed" self.assertIn( "HT \t, LF \n, and CR \r are allowed", serializers.serialize(self.serializer_name, [self.a1]) ) class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase): serializer_name = "xml" Loading