Skip to content

Commit 14866bd

Browse files
author
Commitfest Bot
committed
[CF 4237] v17 - Add XMLCanonicalize function
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/4237 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/[email protected] Author(s): Jim Jones
2 parents f132815 + cbba4f8 commit 14866bd

File tree

8 files changed

+398
-0
lines changed

8 files changed

+398
-0
lines changed

doc/src/sgml/func.sgml

+50
Original file line numberDiff line numberDiff line change
@@ -14623,6 +14623,56 @@ SELECT xmltext('< foo & bar >');
1462314623
</para>
1462414624
</sect3>
1462514625

14626+
<sect3 id="functions-producing-xml-xmlcanonicalize">
14627+
<title><literal>xmlcanonicalize</literal></title>
14628+
14629+
<indexterm>
14630+
<primary>xmlcanonicalize</primary>
14631+
</indexterm>
14632+
14633+
<synopsis>
14634+
<function>xmlcanonicalize</function> ( <parameter>doc</parameter> <type>xml</type> [, <parameter>keep_comments</parameter> <type>boolean</type> DEFAULT <literal>true</literal>] ) <returnvalue>xml</returnvalue>
14635+
14636+
</synopsis>
14637+
14638+
<para>
14639+
This function transforms a given XML document into its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>,
14640+
as defined by the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>, which standardizes the document's
14641+
structure and syntax to facilitate comparison and validation.
14642+
The <parameter>keep_comments</parameter> parameter controls whether XML comments from the input document are preserved or discarded.
14643+
If omitted, it defaults to <literal>true</literal>.
14644+
</para>
14645+
14646+
<para>
14647+
Example:
14648+
<screen><![CDATA[
14649+
SELECT
14650+
xmlcanonicalize(
14651+
'<foo>
14652+
<!-- a comment -->
14653+
<bar c="3" b="2" a="1">42</bar>
14654+
<empty/>
14655+
</foo>'::xml);
14656+
xmlcanonicalize
14657+
-----------------------------------------------------------------------------
14658+
<foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
14659+
(1 row)
14660+
14661+
SELECT
14662+
xmlcanonicalize(
14663+
'<foo>
14664+
<!-- a comment -->
14665+
<bar c="3" b="2" a="1">42</bar>
14666+
<empty/>
14667+
</foo>'::xml, false);
14668+
xmlcanonicalize
14669+
-----------------------------------------------------------
14670+
<foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
14671+
(1 row)
14672+
]]></screen>
14673+
</para>
14674+
</sect3>
14675+
1462614676
<sect3 id="functions-producing-xml-xmlcomment">
1462714677
<title><literal>xmlcomment</literal></title>
1462814678

src/backend/catalog/system_functions.sql

+6
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml)
268268
IMMUTABLE PARALLEL SAFE STRICT COST 1
269269
RETURN xpath_exists($1, $2, '{}'::text[]);
270270

271+
CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true)
272+
RETURNS xml
273+
LANGUAGE internal
274+
IMMUTABLE PARALLEL SAFE STRICT
275+
AS 'xmlcanonicalize';
276+
271277
CREATE OR REPLACE FUNCTION pg_sleep_for(interval)
272278
RETURNS void
273279
LANGUAGE sql

src/backend/utils/adt/xml.c

+43
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include <libxml/xmlwriter.h>
5959
#include <libxml/xpath.h>
6060
#include <libxml/xpathInternals.h>
61+
#include <libxml/c14n.h>
6162

6263
/*
6364
* We used to check for xmlStructuredErrorContext via a configure test; but
@@ -544,6 +545,48 @@ xmltext(PG_FUNCTION_ARGS)
544545
#endif /* not USE_LIBXML */
545546
}
546547

548+
/**
549+
* Converts an XML document to its canonical form according to the
550+
* W3C Canonical XML 1.1 specification implemented on xmlC14NDocDumpMemory.
551+
*/
552+
Datum
553+
xmlcanonicalize(PG_FUNCTION_ARGS)
554+
{
555+
#ifdef USE_LIBXML
556+
xmltype *arg = PG_GETARG_XML_P(0);
557+
bool keep_comments = PG_GETARG_BOOL(1);
558+
text *result;
559+
int nbytes;
560+
xmlDocPtr doc;
561+
xmlChar *xmlbuf = NULL;
562+
563+
doc = xml_parse(arg, XMLOPTION_DOCUMENT, false,
564+
GetDatabaseEncoding(), NULL, NULL, NULL);
565+
566+
/*
567+
* This dumps the canonicalized XML doc into the xmlChar* buffer.
568+
* mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
569+
*/
570+
nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf);
571+
572+
if(doc)
573+
xmlFreeDoc(doc);
574+
575+
if(nbytes < 0)
576+
ereport(ERROR,
577+
(errcode(ERRCODE_INTERNAL_ERROR),
578+
errmsg("could not canonicalize the given XML document")));
579+
580+
result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
581+
582+
xmlFree(xmlbuf);
583+
584+
PG_RETURN_XML_P(result);
585+
#else
586+
NO_XML_SUPPORT();
587+
return 0;
588+
#endif /* not USE_LIBXML */
589+
}
547590

548591
/*
549592
* TODO: xmlconcat needs to merge the notations and unparsed entities

src/include/catalog/pg_proc.dat

+3
Original file line numberDiff line numberDiff line change
@@ -9143,6 +9143,9 @@
91439143
{ oid => '3813', descr => 'generate XML text node',
91449144
proname => 'xmltext', prorettype => 'xml', proargtypes => 'text',
91459145
prosrc => 'xmltext' },
9146+
{ oid => '3814', descr => 'generate the canonical form of an XML document',
9147+
proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool',
9148+
prosrc => 'xmlcanonicalize' },
91469149

91479150
{ oid => '2923', descr => 'map table contents to XML',
91489151
proname => 'table_to_xml', procost => '100', provolatile => 's',

src/test/regress/expected/xml.out

+82
Original file line numberDiff line numberDiff line change
@@ -1877,3 +1877,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
18771877
x&lt;P&gt;73&lt;/P&gt;0.42truej
18781878
(1 row)
18791879

1880+
-- xmlserialize: canonical
1881+
CREATE TABLE xmlcanonicalize_test (doc xml);
1882+
INSERT INTO xmlcanonicalize_test VALUES
1883+
('<?xml version="1.0" encoding="ISO-8859-1"?>
1884+
<!DOCTYPE doc SYSTEM "doc.dtd" [
1885+
<!ENTITY val "42">
1886+
<!ATTLIST xyz attr CDATA "default">
1887+
]>
1888+
1889+
<!-- attributes and namespces will be sorted -->
1890+
<foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
1891+
xmlns:b="http://www.ietf.org"
1892+
xmlns:a="http://www.w3.org"
1893+
xmlns="http://example.org">
1894+
1895+
<!-- Normalization of whitespace in start and end tags -->
1896+
<!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
1897+
<bar xmlns="" xmlns:a="http://www.w3.org" >&val;</bar >
1898+
1899+
<!-- empty element will be converted to start-end tag pair -->
1900+
<empty/>
1901+
1902+
<!-- text will be transcoded to UTF-8 -->
1903+
<transcode>&#49;</transcode>
1904+
1905+
<!-- whitespace inside tag will be preserved -->
1906+
<whitespace> 321 </whitespace>
1907+
1908+
<!-- empty namespace will be removed of child tag -->
1909+
<emptyns xmlns="" >
1910+
<emptyns_child xmlns=""></emptyns_child>
1911+
</emptyns>
1912+
1913+
<!-- CDATA section will be replaced by its value -->
1914+
<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
1915+
</foo> <!-- comment outside root element --> ');
1916+
SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
1917+
xmlcanonicalize
1918+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1919+
<!-- attributes and namespces will be sorted --> +
1920+
<foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
1921+
<!-- comment outside root element -->
1922+
(1 row)
1923+
1924+
SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
1925+
xmlcanonicalize
1926+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1927+
<foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
1928+
(1 row)
1929+
1930+
SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
1931+
?column?
1932+
----------
1933+
t
1934+
(1 row)
1935+
1936+
SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
1937+
xmlcanonicalize
1938+
-----------------
1939+
1940+
(1 row)
1941+
1942+
SELECT xmlcanonicalize(NULL, true);
1943+
xmlcanonicalize
1944+
-----------------
1945+
1946+
(1 row)
1947+
1948+
\set VERBOSITY terse
1949+
SELECT xmlcanonicalize('', true);
1950+
ERROR: invalid XML document
1951+
SELECT xmlcanonicalize(' ', true);
1952+
ERROR: invalid XML document
1953+
SELECT xmlcanonicalize('foo', true);
1954+
ERROR: invalid XML document
1955+
SELECT xmlcanonicalize('');
1956+
ERROR: invalid XML document
1957+
SELECT xmlcanonicalize(' ');
1958+
ERROR: invalid XML document
1959+
SELECT xmlcanonicalize('foo');
1960+
ERROR: invalid XML document
1961+
\set VERBOSITY default

src/test/regress/expected/xml_1.out

+80
Original file line numberDiff line numberDiff line change
@@ -1492,3 +1492,83 @@ ERROR: unsupported XML feature
14921492
LINE 1: SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j':...
14931493
^
14941494
DETAIL: This functionality requires the server to be built with libxml support.
1495+
-- xmlserialize: canonical
1496+
CREATE TABLE xmlcanonicalize_test (doc xml);
1497+
INSERT INTO xmlcanonicalize_test VALUES
1498+
('<?xml version="1.0" encoding="ISO-8859-1"?>
1499+
<!DOCTYPE doc SYSTEM "doc.dtd" [
1500+
<!ENTITY val "42">
1501+
<!ATTLIST xyz attr CDATA "default">
1502+
]>
1503+
1504+
<!-- attributes and namespces will be sorted -->
1505+
<foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
1506+
xmlns:b="http://www.ietf.org"
1507+
xmlns:a="http://www.w3.org"
1508+
xmlns="http://example.org">
1509+
1510+
<!-- Normalization of whitespace in start and end tags -->
1511+
<!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
1512+
<bar xmlns="" xmlns:a="http://www.w3.org" >&val;</bar >
1513+
1514+
<!-- empty element will be converted to start-end tag pair -->
1515+
<empty/>
1516+
1517+
<!-- text will be transcoded to UTF-8 -->
1518+
<transcode>&#49;</transcode>
1519+
1520+
<!-- whitespace inside tag will be preserved -->
1521+
<whitespace> 321 </whitespace>
1522+
1523+
<!-- empty namespace will be removed of child tag -->
1524+
<emptyns xmlns="" >
1525+
<emptyns_child xmlns=""></emptyns_child>
1526+
</emptyns>
1527+
1528+
<!-- CDATA section will be replaced by its value -->
1529+
<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
1530+
</foo> <!-- comment outside root element --> ');
1531+
ERROR: unsupported XML feature
1532+
LINE 2: ('<?xml version="1.0" encoding="ISO-8859-1"?>
1533+
^
1534+
DETAIL: This functionality requires the server to be built with libxml support.
1535+
SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
1536+
xmlcanonicalize
1537+
-----------------
1538+
(0 rows)
1539+
1540+
SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
1541+
xmlcanonicalize
1542+
-----------------
1543+
(0 rows)
1544+
1545+
SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
1546+
?column?
1547+
----------
1548+
(0 rows)
1549+
1550+
SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
1551+
xmlcanonicalize
1552+
-----------------
1553+
(0 rows)
1554+
1555+
SELECT xmlcanonicalize(NULL, true);
1556+
xmlcanonicalize
1557+
-----------------
1558+
1559+
(1 row)
1560+
1561+
\set VERBOSITY terse
1562+
SELECT xmlcanonicalize('', true);
1563+
ERROR: unsupported XML feature at character 24
1564+
SELECT xmlcanonicalize(' ', true);
1565+
ERROR: unsupported XML feature at character 24
1566+
SELECT xmlcanonicalize('foo', true);
1567+
ERROR: unsupported XML feature at character 24
1568+
SELECT xmlcanonicalize('');
1569+
ERROR: unsupported XML feature at character 24
1570+
SELECT xmlcanonicalize(' ');
1571+
ERROR: unsupported XML feature at character 24
1572+
SELECT xmlcanonicalize('foo');
1573+
ERROR: unsupported XML feature at character 24
1574+
\set VERBOSITY default

0 commit comments

Comments
 (0)