diff options
author | Bruce Momjian | 2024-11-01 16:46:51 +0000 |
---|---|---|
committer | Bruce Momjian | 2024-11-01 16:46:51 +0000 |
commit | 641a5b7a1447954076728f259342c2f9201bb0b5 (patch) | |
tree | d5a46fdc3c57222201a2fc5555c09a4561419e6a /doc/src | |
parent | fc7ddededb9ee466d20581b966e01a3566b9559f (diff) |
doc: improve build for non-Latin1 characters
Add README.non-ASCII to explain non-ASCII doc behavior; some text moved
from release.sgml.
Change UTF8 SGML characters to use HTML entities.
Remove unnecessary UTF8 spaces.
Add SVG file check for check-nbsp target.
Add dummy 'pdf' Makefile target.
Reported-by: Yugo Nagata
Discussion: https://postgr.es/m/[email protected]
Backpatch-through: master
Diffstat (limited to 'doc/src')
-rw-r--r-- | doc/src/sgml/Makefile | 11 | ||||
-rw-r--r-- | doc/src/sgml/README.non-ASCII | 37 | ||||
-rw-r--r-- | doc/src/sgml/charset.sgml | 10 | ||||
-rw-r--r-- | doc/src/sgml/images/genetic-algorithm.svg | 4 | ||||
-rw-r--r-- | doc/src/sgml/release.sgml | 18 | ||||
-rw-r--r-- | doc/src/sgml/stylesheet-man.xsl | 12 |
6 files changed, 56 insertions, 36 deletions
diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile index 65ed32cd0ab..12f506c9602 100644 --- a/doc/src/sgml/Makefile +++ b/doc/src/sgml/Makefile @@ -59,7 +59,7 @@ GENERATED_SGML = version.sgml \ features-supported.sgml features-unsupported.sgml errcodes-table.sgml \ keywords-table.sgml targets-meson.sgml wait_event_types.sgml -ALLSGML := $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml) $(GENERATED_SGML) +ALL_SGML := $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml) $(GENERATED_SGML) ALL_IMAGES := $(wildcard $(srcdir)/images/*.svg) @@ -68,7 +68,7 @@ ALL_IMAGES := $(wildcard $(srcdir)/images/*.svg) # we're at it, also resolve all entities (that is, copy all included # files into one big file). This helps tools that don't understand # vpath builds (such as dbtoepub). -postgres-full.xml: postgres.sgml $(ALLSGML) +postgres-full.xml: postgres.sgml $(ALL_SGML) $(XMLLINT) $(XMLINCLUDE) --output $@ --noent --valid $< @@ -143,11 +143,12 @@ postgres.txt: postgres.html ## Print ## -postgres.pdf: +postgres.pdf pdf: $(error Invalid target; use postgres-A4.pdf or postgres-US.pdf as targets) XSLTPROC_FO_FLAGS += --stringparam img.src.path '$(srcdir)/' +# XSL Formatting Objects (FO), https://en.wikipedia.org/wiki/XSL_Formatting_Objects %-A4.fo: stylesheet-fo.xsl %-full.xml $(XSLTPROC) $(XMLINCLUDE) $(XSLTPROCFLAGS) $(XSLTPROC_FO_FLAGS) --stringparam paper.type A4 -o $@ $^ @@ -194,7 +195,7 @@ MAKEINFO = makeinfo ## # Quick syntax check without style processing -check: postgres.sgml $(ALLSGML) check-tabs check-nbsp +check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp $(XMLLINT) $(XMLINCLUDE) --noout --valid $< @@ -264,7 +265,7 @@ check-tabs: # Use perl command because non-GNU grep or sed could not have hex escape sequence. check-nbsp: @ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \ - $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \ + $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/images/*.svg $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \ (echo "Non-breaking spaces appear in SGML/XML files" 1>&2; exit 1) ## diff --git a/doc/src/sgml/README.non-ASCII b/doc/src/sgml/README.non-ASCII new file mode 100644 index 00000000000..9c21e02e8f2 --- /dev/null +++ b/doc/src/sgml/README.non-ASCII @@ -0,0 +1,37 @@ +<!-- doc/src/sgml/README.non-ASCII --> + +Representation of non-ASCII characters +-------------------------------------- + +Find non-ASCII characters using: + + grep --recursive --color='auto' -P '[\x80-\xFF]' . + +Convert to HTML4 named entity (&) escapes +----------------------------------------- + +We support several output formats: + +* html (supports all Unicode characters) +* man (supports all Unicode characters) +* pdf (supports only Latin-1 characters) +* info + +While some output formatting tools support all Unicode characters, +others only support Latin-1 characters. Specifically, the PDF rendering +engine can only display Latin-1 characters; non-Latin-1 Unicode +characters are displayed as "###". + +Therefore, in the SGML files, we only use Latin-1 characters. We +typically encode these characters as HTML entities, e.g., Álvaro. +It is also possible to safely represent Latin-1 characters in UTF8 +encoding for all output formats. + +Do not use UTF numeric character escapes (&#nnn;). + +HTML entities + official: http://www.w3.org/TR/html4/sgml/entities.html + one page: http://www.zipcon.net/~swhite/docs/computers/browsers/entities_page.html + other lists: http://www.zipcon.net/~swhite/docs/computers/browsers/entities.html + http://www.zipcon.net/~swhite/docs/computers/browsers/entities_page.html + https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 1ef5322b912..f5e115e8d6e 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -1225,7 +1225,7 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr <programlisting> -- ignore differences in accents and case CREATE COLLATION ignore_accent_case (provider = icu, deterministic = false, locale = 'und-u-ks-level1'); -SELECT 'Å' = 'A' COLLATE ignore_accent_case; -- true +SELECT 'Å' = 'A' COLLATE ignore_accent_case; -- true SELECT 'z' = 'Z' COLLATE ignore_accent_case; -- true -- upper case letters sort before lower case. @@ -1282,7 +1282,7 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true <entry><literal>'ab' = U&'a\2063b'</literal></entry> <entry><literal>'x-y' = 'x_y'</literal></entry> <entry><literal>'g' = 'G'</literal></entry> - <entry><literal>'n' = 'ñ'</literal></entry> + <entry><literal>'n' = 'ñ'</literal></entry> <entry><literal>'y' = 'z'</literal></entry> </row> </thead> @@ -1346,7 +1346,7 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true <para> At every level, even with full normalization off, basic normalization is - performed. For example, <literal>'á'</literal> may be composed of the + performed. For example, <literal>'á'</literal> may be composed of the code points <literal>U&'\0061\0301'</literal> or the single code point <literal>U&'\00E1'</literal>, and those sequences will be considered equal even at the <literal>identic</literal> level. To treat @@ -1430,8 +1430,8 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false <entry><literal>false</literal></entry> <entry> Backwards comparison for the level 2 differences. For example, - locale <literal>und-u-kb</literal> sorts <literal>'àe'</literal> - before <literal>'aé'</literal>. + locale <literal>und-u-kb</literal> sorts <literal>'àe'</literal> + before <literal>'aé'</literal>. </entry> </row> diff --git a/doc/src/sgml/images/genetic-algorithm.svg b/doc/src/sgml/images/genetic-algorithm.svg index fb9fdd1ba78..2ce5f1b2712 100644 --- a/doc/src/sgml/images/genetic-algorithm.svg +++ b/doc/src/sgml/images/genetic-algorithm.svg @@ -72,7 +72,7 @@ <title>a4->end</title> <path fill="none" stroke="#000000" d="M259,-312.5834C259,-312.5834 259,-54.659 259,-54.659"/> <polygon fill="#000000" stroke="#000000" points="262.5001,-54.659 259,-44.659 255.5001,-54.6591 262.5001,-54.659"/> -<text text-anchor="middle" x="246" y="-186.6212" font-family="sans-serif" font-size="10.00" fill="#000000">true </text> +<text text-anchor="middle" x="246" y="-186.6212" font-family="sans-serif" font-size="10.00" fill="#000000">true</text> </g> <!-- a5 --> <g id="node7" class="node"> @@ -85,7 +85,7 @@ <title>a4->a5</title> <path fill="none" stroke="#000000" d="M144,-298.269C144,-298.269 144,-286.5248 144,-286.5248"/> <polygon fill="#000000" stroke="#000000" points="147.5001,-286.5248 144,-276.5248 140.5001,-286.5249 147.5001,-286.5248"/> -<text text-anchor="middle" x="127" y="-284.3969" font-family="sans-serif" font-size="10.00" fill="#000000">false </text> +<text text-anchor="middle" x="127" y="-284.3969" font-family="sans-serif" font-size="10.00" fill="#000000">false</text> </g> <!-- a6 --> <g id="node8" class="node"> diff --git a/doc/src/sgml/release.sgml b/doc/src/sgml/release.sgml index 8433690dead..cee577ff8d3 100644 --- a/doc/src/sgml/release.sgml +++ b/doc/src/sgml/release.sgml @@ -16,24 +16,6 @@ pg_[A-Za-z0-9_]+ <application>, <structname> \<[a-z]+_[a-z_]+\> <varname>, <structfield> <systemitem class="osname"> -non-ASCII characters find using grep -P '[\x80-\xFF]' or - (remove 'X') grep -X-color='auto' -P -n "[\x80-\xFF]" - convert to HTML4 named entity (&) escapes - - official: http://www.w3.org/TR/html4/sgml/entities.html - one page: http://www.zipcon.net/~swhite/docs/computers/browsers/entities_page.html - other lists: http://www.zipcon.net/~swhite/docs/computers/browsers/entities.html - http://www.zipcon.net/~swhite/docs/computers/browsers/entities_page.html - https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references - - We cannot use UTF8 because rendering engines have to - support the referenced characters. - - Do not use numeric _UTF_ numeric character escapes (&#nnn;), - we can only use Latin1. - - Example: Alvaro Herrera is Álvaro Herrera - wrap long lines For new features, add links to the documentation sections. diff --git a/doc/src/sgml/stylesheet-man.xsl b/doc/src/sgml/stylesheet-man.xsl index fcb485c2931..2e2564da683 100644 --- a/doc/src/sgml/stylesheet-man.xsl +++ b/doc/src/sgml/stylesheet-man.xsl @@ -213,12 +213,12 @@ <!-- Slight rephrasing to indicate that missing sections are found in the documentation. --> <l:context name="xref-number-and-title"> - <l:template name="chapter" text="Chapter %n, %t, in the documentation"/> - <l:template name="sect1" text="Section %n, “%t”, in the documentation"/> - <l:template name="sect2" text="Section %n, “%t”, in the documentation"/> - <l:template name="sect3" text="Section %n, “%t”, in the documentation"/> - <l:template name="sect4" text="Section %n, “%t”, in the documentation"/> - <l:template name="sect5" text="Section %n, “%t”, in the documentation"/> + <l:template name="chapter" text="Chapter %n, "%t", in the documentation"/> + <l:template name="sect1" text="Section %n, "%t", in the documentation"/> + <l:template name="sect2" text="Section %n, "%t", in the documentation"/> + <l:template name="sect3" text="Section %n, "%t", in the documentation"/> + <l:template name="sect4" text="Section %n, "%t", in the documentation"/> + <l:template name="sect5" text="Section %n, "%t", in the documentation"/> </l:context> </l:l10n> </l:i18n> |