Skip to content

Commit 4d2b880

Browse files
shaibnessita
authored andcommitted
[4.2.x] Fixed CVE-2025-64460 -- Corrected quadratic inner text accumulation in XML serializer.
Previously, `getInnerText()` recursively used `list.extend()` on strings, which added each character from child nodes as a separate list element. On deeply nested XML content, this caused the overall deserialization work to grow quadratically with input size, potentially allowing disproportionate CPU consumption for crafted XML. The fix separates collection of inner texts from joining them, so that each subtree is joined only once, reducing the complexity to linear in the size of the input. These changes also include a mitigation for a xml.dom.minidom performance issue. Thanks Seokchan Yoon (https://ch4n3.kr/) for report. Co-authored-by: Jacob Walls <[email protected]> Co-authored-by: Natalia <[email protected]> Backport of 50efb71 from main.
1 parent f997037 commit 4d2b880

File tree

4 files changed

+99
-7
lines changed

4 files changed

+99
-7
lines changed

django/core/serializers/xml_serializer.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
XML serializer.
33
"""
44
import json
5-
from xml.dom import pulldom
5+
from contextlib import contextmanager
6+
from xml.dom import minidom, pulldom
67
from xml.sax import handler
78
from xml.sax.expatreader import ExpatParser as _ExpatParser
89

@@ -14,6 +15,25 @@
1415
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
1516

1617

18+
@contextmanager
19+
def fast_cache_clearing():
20+
"""Workaround for performance issues in minidom document checks.
21+
22+
Speeds up repeated DOM operations by skipping unnecessary full traversal
23+
of the DOM tree.
24+
"""
25+
module_helper_was_lambda = False
26+
if original_fn := getattr(minidom, "_in_document", None):
27+
module_helper_was_lambda = original_fn.__name__ == "<lambda>"
28+
if not module_helper_was_lambda:
29+
minidom._in_document = lambda node: bool(node.ownerDocument)
30+
try:
31+
yield
32+
finally:
33+
if original_fn and not module_helper_was_lambda:
34+
minidom._in_document = original_fn
35+
36+
1737
class Serializer(base.Serializer):
1838
"""Serialize a QuerySet to XML."""
1939

@@ -208,7 +228,8 @@ def _make_parser(self):
208228
def __next__(self):
209229
for event, node in self.event_stream:
210230
if event == "START_ELEMENT" and node.nodeName == "object":
211-
self.event_stream.expandNode(node)
231+
with fast_cache_clearing():
232+
self.event_stream.expandNode(node)
212233
return self._handle_object(node)
213234
raise StopIteration
214235

@@ -392,19 +413,25 @@ def _get_model_from_node(self, node, attr):
392413

393414
def getInnerText(node):
394415
"""Get all the inner text of a DOM node (recursively)."""
416+
inner_text_list = getInnerTextList(node)
417+
return "".join(inner_text_list)
418+
419+
420+
def getInnerTextList(node):
421+
"""Return a list of the inner texts of a DOM node (recursively)."""
395422
# inspired by https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
396-
inner_text = []
423+
result = []
397424
for child in node.childNodes:
398425
if (
399426
child.nodeType == child.TEXT_NODE
400427
or child.nodeType == child.CDATA_SECTION_NODE
401428
):
402-
inner_text.append(child.data)
429+
result.append(child.data)
403430
elif child.nodeType == child.ELEMENT_NODE:
404-
inner_text.extend(getInnerText(child))
431+
result.extend(getInnerTextList(child))
405432
else:
406433
pass
407-
return "".join(inner_text)
434+
return result
408435

409436

410437
# Below code based on Christian Heimes' defusedxml

docs/releases/4.2.27.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
1515
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
1616
PostgreSQL.
1717

18+
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
19+
=================================================================================
20+
21+
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
22+
denial-of-service attack due to quadratic time complexity when deserializing
23+
crafted documents containing many nested invalid elements. The internal helper
24+
``django.core.serializers.xml_serializer.getInnerText()`` previously
25+
accumulated inner text inefficiently during recursion. It now collects text per
26+
element, avoiding excessive resource usage.
27+
1828
Bugfixes
1929
========
2030

docs/topics/serialization.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@ Identifier Information
173173
.. _jsonl: https://jsonlines.org/
174174
.. _PyYAML: https://pyyaml.org/
175175

176+
.. _serialization-formats-xml:
177+
176178
XML
177179
---
178180

tests/serializers/test_xml.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import gc
2+
import time
13
from xml.dom import minidom
24

35
from django.core import serializers
4-
from django.core.serializers.xml_serializer import DTDForbidden
6+
from django.core.serializers.xml_serializer import Deserializer, DTDForbidden
7+
from django.db import models
58
from django.test import TestCase, TransactionTestCase
69

710
from .tests import SerializersTestBase, SerializersTransactionTestBase
@@ -90,6 +93,56 @@ def test_no_dtd(self):
9093
with self.assertRaises(DTDForbidden):
9194
next(serializers.deserialize("xml", xml))
9295

96+
def test_crafted_xml_performance(self):
97+
"""The time to process invalid inputs is not quadratic."""
98+
99+
def build_crafted_xml(depth, leaf_text_len):
100+
nested_open = "<nested>" * depth
101+
nested_close = "</nested>" * depth
102+
leaf = "x" * leaf_text_len
103+
field_content = f"{nested_open}{leaf}{nested_close}"
104+
return f"""
105+
<django-objects version="1.0">
106+
<object model="contenttypes.contenttype" pk="1">
107+
<field name="app_label">{field_content}</field>
108+
<field name="model">m</field>
109+
</object>
110+
</django-objects>
111+
"""
112+
113+
def deserialize(crafted_xml):
114+
iterator = Deserializer(crafted_xml)
115+
gc.collect()
116+
117+
start_time = time.perf_counter()
118+
result = list(iterator)
119+
end_time = time.perf_counter()
120+
121+
self.assertEqual(len(result), 1)
122+
self.assertIsInstance(result[0].object, models.Model)
123+
return end_time - start_time
124+
125+
def assertFactor(label, params, factor=2):
126+
factors = []
127+
prev_time = None
128+
for depth, length in params:
129+
crafted_xml = build_crafted_xml(depth, length)
130+
elapsed = deserialize(crafted_xml)
131+
if prev_time is not None:
132+
factors.append(elapsed / prev_time)
133+
prev_time = elapsed
134+
135+
with self.subTest(label):
136+
# Assert based on the average factor to reduce test flakiness.
137+
self.assertLessEqual(sum(factors) / len(factors), factor)
138+
139+
assertFactor(
140+
"varying depth, varying length",
141+
[(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
142+
2,
143+
)
144+
assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
145+
93146

94147
class XmlSerializerTransactionTestCase(
95148
SerializersTransactionTestBase, TransactionTestCase

0 commit comments

Comments
 (0)