PostgreSQL Source Code git master
euc2004_sjis2004.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * EUC_JIS_2004, SHIFT_JIS_2004
4 *
5 * Copyright (c) 2007-2025, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
9 *
10 *-------------------------------------------------------------------------
11 */
12
13#include "postgres.h"
14#include "fmgr.h"
15#include "mb/pg_wchar.h"
16
18 .name = "euc2004_sjis2004",
19 .version = PG_VERSION
20);
21
24
25static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError);
26static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError);
27
28/* ----------
29 * conv_proc(
30 * INTEGER, -- source encoding id
31 * INTEGER, -- destination encoding id
32 * CSTRING, -- source string (null terminated C string)
33 * CSTRING, -- destination string (null terminated C string)
34 * INTEGER, -- source string length
35 * BOOL -- if true, don't throw an error if conversion fails
36 * ) returns INTEGER;
37 *
38 * Returns the number of bytes successfully converted.
39 * ----------
40 */
41
44{
45 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
46 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
47 int len = PG_GETARG_INT32(4);
48 bool noError = PG_GETARG_BOOL(5);
49 int converted;
50
52
53 converted = euc_jis_20042shift_jis_2004(src, dest, len, noError);
54
55 PG_RETURN_INT32(converted);
56}
57
60{
61 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
62 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
63 int len = PG_GETARG_INT32(4);
64 bool noError = PG_GETARG_BOOL(5);
65 int converted;
66
68
69 converted = shift_jis_20042euc_jis_2004(src, dest, len, noError);
70
71 PG_RETURN_INT32(converted);
72}
73
74/*
75 * EUC_JIS_2004 -> SHIFT_JIS_2004
76 */
77static int
78euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError)
79{
80 const unsigned char *start = euc;
81 int c1,
82 ku,
83 ten;
84 int l;
85
86 while (len > 0)
87 {
88 c1 = *euc;
89 if (!IS_HIGHBIT_SET(c1))
90 {
91 /* ASCII */
92 if (c1 == 0)
93 {
94 if (noError)
95 break;
97 (const char *) euc, len);
98 }
99 *p++ = c1;
100 euc++;
101 len--;
102 continue;
103 }
104
105 l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
106
107 if (l < 0)
108 {
109 if (noError)
110 break;
112 (const char *) euc, len);
113 }
114
115 if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
116 {
117 *p++ = euc[1];
118 }
119 else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
120 {
121 ku = euc[1] - 0xa0;
122 ten = euc[2] - 0xa0;
123
124 switch (ku)
125 {
126 case 1:
127 case 3:
128 case 4:
129 case 5:
130 case 8:
131 case 12:
132 case 13:
133 case 14:
134 case 15:
135 *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
136 break;
137 default:
138 if (ku >= 78 && ku <= 94)
139 {
140 *p++ = (ku + 0x19b) >> 1;
141 }
142 else
143 {
144 if (noError)
145 break;
147 (const char *) euc, len);
148 }
149 }
150
151 if (ku % 2)
152 {
153 if (ten >= 1 && ten <= 63)
154 *p++ = ten + 0x3f;
155 else if (ten >= 64 && ten <= 94)
156 *p++ = ten + 0x40;
157 else
158 {
159 if (noError)
160 break;
162 (const char *) euc, len);
163 }
164 }
165 else
166 *p++ = ten + 0x9e;
167 }
168
169 else if (l == 2) /* JIS X 0213 plane 1? */
170 {
171 ku = c1 - 0xa0;
172 ten = euc[1] - 0xa0;
173
174 if (ku >= 1 && ku <= 62)
175 *p++ = (ku + 0x101) >> 1;
176 else if (ku >= 63 && ku <= 94)
177 *p++ = (ku + 0x181) >> 1;
178 else
179 {
180 if (noError)
181 break;
183 (const char *) euc, len);
184 }
185
186 if (ku % 2)
187 {
188 if (ten >= 1 && ten <= 63)
189 *p++ = ten + 0x3f;
190 else if (ten >= 64 && ten <= 94)
191 *p++ = ten + 0x40;
192 else
193 {
194 if (noError)
195 break;
197 (const char *) euc, len);
198 }
199 }
200 else
201 *p++ = ten + 0x9e;
202 }
203 else
204 {
205 if (noError)
206 break;
208 (const char *) euc, len);
209 }
210
211 euc += l;
212 len -= l;
213 }
214 *p = '\0';
215
216 return euc - start;
217}
218
219/*
220 * returns SHIFT_JIS_2004 "ku" code indicated by second byte
221 * *ku = 0: "ku" = even
222 * *ku = 1: "ku" = odd
223 */
224static int
225get_ten(int b, int *ku)
226{
227 int ten;
228
229 if (b >= 0x40 && b <= 0x7e)
230 {
231 ten = b - 0x3f;
232 *ku = 1;
233 }
234 else if (b >= 0x80 && b <= 0x9e)
235 {
236 ten = b - 0x40;
237 *ku = 1;
238 }
239 else if (b >= 0x9f && b <= 0xfc)
240 {
241 ten = b - 0x9e;
242 *ku = 0;
243 }
244 else
245 {
246 ten = -1; /* error */
247 *ku = 0; /* keep compiler quiet */
248 }
249 return ten;
250}
251
252/*
253 * SHIFT_JIS_2004 ---> EUC_JIS_2004
254 */
255
256static int
257shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError)
258{
259 const unsigned char *start = sjis;
260 int c1;
261 int ku,
262 ten,
263 kubun;
264 int plane;
265 int l;
266
267 while (len > 0)
268 {
269 c1 = *sjis;
270
271 if (!IS_HIGHBIT_SET(c1))
272 {
273 /* ASCII */
274 if (c1 == 0)
275 {
276 if (noError)
277 break;
279 (const char *) sjis, len);
280 }
281 *p++ = c1;
282 sjis++;
283 len--;
284 continue;
285 }
286
287 l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
288
289 if (l < 0 || l > len)
290 {
291 if (noError)
292 break;
294 (const char *) sjis, len);
295 }
296
297 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
298 {
299 /* JIS X0201 (1 byte kana) */
300 *p++ = SS2;
301 *p++ = c1;
302 }
303 else if (l == 2)
304 {
305 int c2 = sjis[1];
306
307 plane = 1;
308 ku = 1;
309 ten = 1;
310
311 /*
312 * JIS X 0213
313 */
314 if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
315 {
316 ku = (c1 << 1) - 0x100;
317 ten = get_ten(c2, &kubun);
318 if (ten < 0)
319 {
320 if (noError)
321 break;
323 (const char *) sjis, len);
324 }
325 ku -= kubun;
326 }
327 else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
328 {
329 ku = (c1 << 1) - 0x180;
330 ten = get_ten(c2, &kubun);
331 if (ten < 0)
332 {
333 if (noError)
334 break;
336 (const char *) sjis, len);
337 }
338 ku -= kubun;
339 }
340 else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
341 * 1,3,4,5,8,12,13,14,15 ku */
342 {
343 plane = 2;
344 ten = get_ten(c2, &kubun);
345 if (ten < 0)
346 {
347 if (noError)
348 break;
350 (const char *) sjis, len);
351 }
352 switch (c1)
353 {
354 case 0xf0:
355 ku = kubun == 0 ? 8 : 1;
356 break;
357 case 0xf1:
358 ku = kubun == 0 ? 4 : 3;
359 break;
360 case 0xf2:
361 ku = kubun == 0 ? 12 : 5;
362 break;
363 default:
364 ku = kubun == 0 ? 14 : 13;
365 break;
366 }
367 }
368 else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
369 {
370 plane = 2;
371 ten = get_ten(c2, &kubun);
372 if (ten < 0)
373 {
374 if (noError)
375 break;
377 (const char *) sjis, len);
378 }
379 if (c1 == 0xf4 && kubun == 1)
380 ku = 15;
381 else
382 ku = (c1 << 1) - 0x19a - kubun;
383 }
384 else
385 {
386 if (noError)
387 break;
389 (const char *) sjis, len);
390 }
391
392 if (plane == 2)
393 *p++ = SS3;
394
395 *p++ = ku + 0xa0;
396 *p++ = ten + 0xa0;
397 }
398 sjis += l;
399 len -= l;
400 }
401 *p = '\0';
402
403 return sjis - start;
404}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError)
static int get_ten(int b, int *ku)
Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004)
static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError)
PG_MODULE_MAGIC_EXT(.name="euc2004_sjis2004",.version=PG_VERSION)
Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
return str start
int b
Definition: isn.c:74
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
@ PG_SHIFT_JIS_2004
Definition: pg_wchar.h:270
@ PG_EUC_JIS_2004
Definition: pg_wchar.h:231
#define SS2
Definition: pg_wchar.h:38
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
#define SS3
Definition: pg_wchar.h:39
uintptr_t Datum
Definition: postgres.h:69
const char * name
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2189