PostgreSQL Source Code git master
utf8_and_iso8859.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * ISO 8859 2-16 <--> UTF8
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 * IDENTIFICATION
9 * src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14#include "postgres.h"
15#include "fmgr.h"
16#include "mb/pg_wchar.h"
17#include "../../Unicode/iso8859_10_to_utf8.map"
18#include "../../Unicode/iso8859_13_to_utf8.map"
19#include "../../Unicode/iso8859_14_to_utf8.map"
20#include "../../Unicode/iso8859_15_to_utf8.map"
21#include "../../Unicode/iso8859_2_to_utf8.map"
22#include "../../Unicode/iso8859_3_to_utf8.map"
23#include "../../Unicode/iso8859_4_to_utf8.map"
24#include "../../Unicode/iso8859_5_to_utf8.map"
25#include "../../Unicode/iso8859_6_to_utf8.map"
26#include "../../Unicode/iso8859_7_to_utf8.map"
27#include "../../Unicode/iso8859_8_to_utf8.map"
28#include "../../Unicode/iso8859_9_to_utf8.map"
29#include "../../Unicode/utf8_to_iso8859_10.map"
30#include "../../Unicode/utf8_to_iso8859_13.map"
31#include "../../Unicode/utf8_to_iso8859_14.map"
32#include "../../Unicode/utf8_to_iso8859_15.map"
33#include "../../Unicode/utf8_to_iso8859_16.map"
34#include "../../Unicode/utf8_to_iso8859_2.map"
35#include "../../Unicode/utf8_to_iso8859_3.map"
36#include "../../Unicode/utf8_to_iso8859_4.map"
37#include "../../Unicode/utf8_to_iso8859_5.map"
38#include "../../Unicode/utf8_to_iso8859_6.map"
39#include "../../Unicode/utf8_to_iso8859_7.map"
40#include "../../Unicode/utf8_to_iso8859_8.map"
41#include "../../Unicode/utf8_to_iso8859_9.map"
42#include "../../Unicode/iso8859_16_to_utf8.map"
43
45 .name = "utf8_and_iso8859",
46 .version = PG_VERSION
47);
48
51
52/* ----------
53 * conv_proc(
54 * INTEGER, -- source encoding id
55 * INTEGER, -- destination encoding id
56 * CSTRING, -- source string (null terminated C string)
57 * CSTRING, -- destination string (null terminated C string)
58 * INTEGER, -- source string length
59 * BOOL -- if true, don't throw an error if conversion fails
60 * ) returns INTEGER;
61 *
62 * Returns the number of bytes successfully converted.
63 * ----------
64 */
65
66typedef struct
67{
69 const pg_mb_radix_tree *map1; /* to UTF8 map name */
70 const pg_mb_radix_tree *map2; /* from UTF8 map name */
72
73static const pg_conv_map maps[] = {
74 {PG_LATIN2, &iso8859_2_to_unicode_tree,
75 &iso8859_2_from_unicode_tree}, /* ISO-8859-2 Latin 2 */
76 {PG_LATIN3, &iso8859_3_to_unicode_tree,
77 &iso8859_3_from_unicode_tree}, /* ISO-8859-3 Latin 3 */
78 {PG_LATIN4, &iso8859_4_to_unicode_tree,
79 &iso8859_4_from_unicode_tree}, /* ISO-8859-4 Latin 4 */
80 {PG_LATIN5, &iso8859_9_to_unicode_tree,
81 &iso8859_9_from_unicode_tree}, /* ISO-8859-9 Latin 5 */
82 {PG_LATIN6, &iso8859_10_to_unicode_tree,
83 &iso8859_10_from_unicode_tree}, /* ISO-8859-10 Latin 6 */
84 {PG_LATIN7, &iso8859_13_to_unicode_tree,
85 &iso8859_13_from_unicode_tree}, /* ISO-8859-13 Latin 7 */
86 {PG_LATIN8, &iso8859_14_to_unicode_tree,
87 &iso8859_14_from_unicode_tree}, /* ISO-8859-14 Latin 8 */
88 {PG_LATIN9, &iso8859_15_to_unicode_tree,
89 &iso8859_15_from_unicode_tree}, /* ISO-8859-15 Latin 9 */
90 {PG_LATIN10, &iso8859_16_to_unicode_tree,
91 &iso8859_16_from_unicode_tree}, /* ISO-8859-16 Latin 10 */
92 {PG_ISO_8859_5, &iso8859_5_to_unicode_tree,
93 &iso8859_5_from_unicode_tree}, /* ISO-8859-5 */
94 {PG_ISO_8859_6, &iso8859_6_to_unicode_tree,
95 &iso8859_6_from_unicode_tree}, /* ISO-8859-6 */
96 {PG_ISO_8859_7, &iso8859_7_to_unicode_tree,
97 &iso8859_7_from_unicode_tree}, /* ISO-8859-7 */
98 {PG_ISO_8859_8, &iso8859_8_to_unicode_tree,
99 &iso8859_8_from_unicode_tree}, /* ISO-8859-8 */
100};
101
102Datum
104{
105 int encoding = PG_GETARG_INT32(0);
106 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
107 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
108 int len = PG_GETARG_INT32(4);
109 bool noError = PG_GETARG_BOOL(5);
110 int i;
111
113
114 for (i = 0; i < lengthof(maps); i++)
115 {
116 if (encoding == maps[i].encoding)
117 {
118 int converted;
119
120 converted = LocalToUtf(src, len, dest,
121 maps[i].map1,
122 NULL, 0,
123 NULL,
124 encoding,
125 noError);
126 PG_RETURN_INT32(converted);
127 }
128 }
129
131 (errcode(ERRCODE_INTERNAL_ERROR),
132 errmsg("unexpected encoding ID %d for ISO 8859 character sets",
133 encoding)));
134
136}
137
138Datum
140{
141 int encoding = PG_GETARG_INT32(1);
142 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
143 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
144 int len = PG_GETARG_INT32(4);
145 bool noError = PG_GETARG_BOOL(5);
146 int i;
147
149
150 for (i = 0; i < lengthof(maps); i++)
151 {
152 if (encoding == maps[i].encoding)
153 {
154 int converted;
155
156 converted = UtfToLocal(src, len, dest,
157 maps[i].map2,
158 NULL, 0,
159 NULL,
160 encoding,
161 noError);
162 PG_RETURN_INT32(converted);
163 }
164 }
165
167 (errcode(ERRCODE_INTERNAL_ERROR),
168 errmsg("unexpected encoding ID %d for ISO 8859 character sets",
169 encoding)));
170
172}
#define lengthof(array)
Definition: c.h:759
int UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:507
int LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
Definition: conv.c:717
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int i
Definition: isn.c:77
const void size_t len
int32 encoding
Definition: pg_database.h:41
pg_enc
Definition: pg_wchar.h:225
@ PG_LATIN4
Definition: pg_wchar.h:237
@ PG_LATIN9
Definition: pg_wchar.h:242
@ PG_ISO_8859_6
Definition: pg_wchar.h:252
@ PG_LATIN6
Definition: pg_wchar.h:239
@ PG_LATIN5
Definition: pg_wchar.h:238
@ PG_LATIN2
Definition: pg_wchar.h:235
@ PG_ISO_8859_5
Definition: pg_wchar.h:251
@ PG_LATIN10
Definition: pg_wchar.h:243
@ PG_ISO_8859_7
Definition: pg_wchar.h:253
@ PG_LATIN8
Definition: pg_wchar.h:241
@ PG_LATIN3
Definition: pg_wchar.h:236
@ PG_LATIN7
Definition: pg_wchar.h:240
@ PG_UTF8
Definition: pg_wchar.h:232
@ PG_ISO_8859_8
Definition: pg_wchar.h:254
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
uintptr_t Datum
Definition: postgres.h:69
const pg_mb_radix_tree * map2
const pg_mb_radix_tree * map1
PG_MODULE_MAGIC_EXT(.name="utf8_and_iso8859",.version=PG_VERSION)
static const pg_conv_map maps[]
Datum utf8_to_iso8859(PG_FUNCTION_ARGS)
Datum iso8859_to_utf8(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(iso8859_to_utf8)
const char * name