Skip to content

Commit 5d68d61

Browse files
committed
Implement #53655: Improve speed of DOMNode::C14N() on large XML documents
The XPath query is in accordance to spec [1]. However, we can do it in a simpler way. We can use a custom callback function instead of a linear search in XPath to check if a node is visible. Note that comment nodes are handled internally by libxml2 already, so we do not need to differentiate between node types. The callback will do an upwards traversal of the tree until the root of the canonicalization is reached. In practice this will speed up the application a lot. [1] https://www.w3.org/TR/2001/REC-xml-c14n-20010315 section 2.1 Closes GH-12278.
1 parent ab30f27 commit 5d68d61

File tree

3 files changed

+31
-16
lines changed

3 files changed

+31
-16
lines changed

NEWS

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ Core:
77

88
DOM:
99
. Added DOMNode::compareDocumentPosition(). (nielsdos)
10+
. Implement #53655 (Improve speed of DOMNode::C14N() on large XML documents).
11+
(nielsdos)
1012

1113
Intl:
1214
. Added IntlDateFormatter::PATTERN constant. (David Carlier)

UPGRADING

+3
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,6 @@ PHP 8.4 UPGRADE NOTES
107107
14. Performance Improvements
108108
========================================
109109

110+
* The performance of DOMNode::C14N() is greatly improved for the case without
111+
an xpath query. This can give a time improvement of easily two order of
112+
magnitude for documents with tens of thousands of nodes.

ext/dom/node.c

+26-16
Original file line numberDiff line numberDiff line change
@@ -1736,6 +1736,25 @@ PHP_METHOD(DOMNode, lookupNamespaceURI)
17361736
}
17371737
/* }}} end dom_node_lookup_namespace_uri */
17381738

1739+
static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent)
1740+
{
1741+
xmlNodePtr root = user_data;
1742+
/* We have to unroll the first iteration because node->parent
1743+
* is not necessarily equal to parent due to libxml2 tree rules (ns decls out of the tree for example). */
1744+
if (node == root) {
1745+
return 1;
1746+
}
1747+
node = parent;
1748+
while (node != NULL) {
1749+
if (node == root) {
1750+
return 1;
1751+
}
1752+
node = node->parent;
1753+
}
1754+
1755+
return 0;
1756+
}
1757+
17391758
static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
17401759
{
17411760
zval *id;
@@ -1779,22 +1798,10 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
17791798

17801799
php_libxml_invalidate_node_list_cache_from_doc(docp);
17811800

1801+
bool simple_node_parent_lookup_callback = false;
17821802
if (xpath_array == NULL) {
17831803
if (nodep->type != XML_DOCUMENT_NODE) {
1784-
ctxp = xmlXPathNewContext(docp);
1785-
ctxp->node = nodep;
1786-
xpathobjp = xmlXPathEvalExpression((xmlChar *) "(.//. | .//@* | .//namespace::*)", ctxp);
1787-
ctxp->node = NULL;
1788-
if (xpathobjp && xpathobjp->type == XPATH_NODESET) {
1789-
nodeset = xpathobjp->nodesetval;
1790-
} else {
1791-
if (xpathobjp) {
1792-
xmlXPathFreeObject(xpathobjp);
1793-
}
1794-
xmlXPathFreeContext(ctxp);
1795-
zend_throw_error(NULL, "XPath query did not return a nodeset");
1796-
RETURN_THROWS();
1797-
}
1804+
simple_node_parent_lookup_callback = true;
17981805
}
17991806
} else {
18001807
/*xpath query from xpath_array */
@@ -1873,8 +1880,11 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
18731880
}
18741881

18751882
if (buf != NULL) {
1876-
ret = xmlC14NDocSaveTo(docp, nodeset, exclusive, inclusive_ns_prefixes,
1877-
with_comments, buf);
1883+
if (simple_node_parent_lookup_callback) {
1884+
ret = xmlC14NExecute(docp, dom_canonicalize_node_parent_lookup_cb, nodep, exclusive, inclusive_ns_prefixes, with_comments, buf);
1885+
} else {
1886+
ret = xmlC14NDocSaveTo(docp, nodeset, exclusive, inclusive_ns_prefixes, with_comments, buf);
1887+
}
18781888
}
18791889

18801890
if (inclusive_ns_prefixes != NULL) {

0 commit comments

Comments
 (0)