@@ -40,9 +40,6 @@ static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, b
40
40
static void mb_wchar_to_cp50221 (uint32_t * in , size_t len , mb_convert_buf * buf , bool end );
41
41
static void mb_wchar_to_cp50222 (uint32_t * in , size_t len , mb_convert_buf * buf , bool end );
42
42
43
- /* See mbstring.c */
44
- uint32_t mb_convert_kana_codepoint (uint32_t c , uint32_t next , bool * consumed , uint32_t * second , int mode );
45
-
46
43
/* Previously, a dubious 'encoding' called 'cp50220raw' was supported
47
44
* This was just CP50220, but the implementation was less strict regarding
48
45
* invalid characters; it would silently pass some through
@@ -333,6 +330,198 @@ static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter)
333
330
return 0 ;
334
331
}
335
332
333
+ /* Apply various transforms to input codepoint, such as converting halfwidth katakana
334
+ * to fullwidth katakana. `mode` is a bitfield which controls which transforms are
335
+ * actually performed. The bit values are defined in translit_kana_jisx0201_jisx0208.h.
336
+ * `mode` must not call for transforms which are inverses (i.e. which would cancel
337
+ * each other out).
338
+ *
339
+ * In some cases, successive input codepoints may be merged into one output codepoint.
340
+ * (That is the purpose of the `next` parameter.) If the `next` codepoint is consumed
341
+ * and should be skipped over, `*consumed` will be set to true. Otherwise, `*consumed`
342
+ * will not be modified. If there is no following codepoint, `next` should be zero.
343
+ *
344
+ * Again, in some cases, one input codepoint may convert to two output codepoints.
345
+ * If so, the second output codepoint will be stored in `*second`.
346
+ *
347
+ * Return the resulting codepoint. If none of the requested transforms apply, return
348
+ * the input codepoint unchanged.
349
+ */
350
+ uint32_t mb_convert_kana_codepoint (uint32_t c , uint32_t next , bool * consumed , uint32_t * second , unsigned int mode )
351
+ {
352
+ if ((mode & MBFL_HAN2ZEN_ALL ) && c >= 0x21 && c <= 0x7D && c != '"' && c != '\'' && c != '\\' ) {
353
+ return c + 0xFEE0 ;
354
+ }
355
+ if ((mode & MBFL_HAN2ZEN_ALPHA ) && ((c >= 'A' && c <= 'Z' ) || (c >= 'a' && c <= 'z' ))) {
356
+ return c + 0xFEE0 ;
357
+ }
358
+ if ((mode & MBFL_HAN2ZEN_NUMERIC ) && c >= '0' && c <= '9' ) {
359
+ return c + 0xFEE0 ;
360
+ }
361
+ if ((mode & MBFL_HAN2ZEN_SPACE ) && c == ' ' ) {
362
+ return 0x3000 ;
363
+ }
364
+
365
+ if (mode & (MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_HIRAGANA )) {
366
+ /* Convert Hankaku kana to Zenkaku kana
367
+ * Either all Hankaku kana (including katakana and hiragana) will be converted
368
+ * to Zenkaku katakana, or to Zenkaku hiragana */
369
+ if ((mode & MBFL_HAN2ZEN_KATAKANA ) && (mode & MBFL_HAN2ZEN_GLUE )) {
370
+ if (c >= 0xFF61 && c <= 0xFF9F ) {
371
+ int n = c - 0xFF60 ;
372
+
373
+ if (next >= 0xFF61 && next <= 0xFF9F ) {
374
+ if (next == 0xFF9E && ((n >= 22 && n <= 36 ) || (n >= 42 && n <= 46 ))) {
375
+ * consumed = true;
376
+ return 0x3001 + hankana2zenkana_table [n ];
377
+ }
378
+ if (next == 0xFF9E && n == 19 ) {
379
+ * consumed = true;
380
+ return 0x30F4 ;
381
+ }
382
+ if (next == 0xFF9F && n >= 42 && n <= 46 ) {
383
+ * consumed = true;
384
+ return 0x3002 + hankana2zenkana_table [n ];
385
+ }
386
+ }
387
+
388
+ return 0x3000 + hankana2zenkana_table [n ];
389
+ }
390
+ }
391
+ if ((mode & MBFL_HAN2ZEN_HIRAGANA ) && (mode & MBFL_HAN2ZEN_GLUE )) {
392
+ if (c >= 0xFF61 && c <= 0xFF9F ) {
393
+ int n = c - 0xFF60 ;
394
+
395
+ if (next >= 0xFF61 && next <= 0xFF9F ) {
396
+ if (next == 0xFF9E && ((n >= 22 && n <= 36 ) || (n >= 42 && n <= 46 ))) {
397
+ * consumed = true;
398
+ return 0x3001 + hankana2zenhira_table [n ];
399
+ }
400
+ if (next == 0xFF9F && n >= 42 && n <= 46 ) {
401
+ * consumed = true;
402
+ return 0x3002 + hankana2zenhira_table [n ];
403
+ }
404
+ }
405
+
406
+ return 0x3000 + hankana2zenhira_table [n ];
407
+ }
408
+ }
409
+ if ((mode & MBFL_HAN2ZEN_KATAKANA ) && c >= 0xFF61 && c <= 0xFF9F ) {
410
+ return 0x3000 + hankana2zenkana_table [c - 0xFF60 ];
411
+ }
412
+ if ((mode & MBFL_HAN2ZEN_HIRAGANA ) && c >= 0xFF61 && c <= 0xFF9F ) {
413
+ return 0x3000 + hankana2zenhira_table [c - 0xFF60 ];
414
+ }
415
+ }
416
+
417
+ if (mode & MBFL_HAN2ZEN_SPECIAL ) { /* special ascii to symbol */
418
+ if (c == '\\' || c == 0xA5 ) { /* YEN SIGN */
419
+ return 0xFFE5 ; /* FULLWIDTH YEN SIGN */
420
+ }
421
+ if (c == 0x7E || c == 0x203E ) {
422
+ return 0xFFE3 ; /* FULLWIDTH MACRON */
423
+ }
424
+ if (c == '\'' ) {
425
+ return 0x2019 ; /* RIGHT SINGLE QUOTATION MARK */
426
+ }
427
+ if (c == '"' ) {
428
+ return 0x201D ; /* RIGHT DOUBLE QUOTATION MARK */
429
+ }
430
+ }
431
+
432
+ if (mode & (MBFL_ZEN2HAN_ALL | MBFL_ZEN2HAN_ALPHA | MBFL_ZEN2HAN_NUMERIC | MBFL_ZEN2HAN_SPACE )) {
433
+ /* Zenkaku to Hankaku */
434
+ if ((mode & MBFL_ZEN2HAN_ALL ) && c >= 0xFF01 && c <= 0xFF5D && c != 0xFF02 && c != 0xFF07 && c != 0xFF3C ) {
435
+ /* all except " ' \ ~ */
436
+ return c - 0xFEE0 ;
437
+ }
438
+ if ((mode & MBFL_ZEN2HAN_ALPHA ) && ((c >= 0xFF21 && c <= 0xFF3A ) || (c >= 0xFF41 && c <= 0xFF5A ))) {
439
+ return c - 0xFEE0 ;
440
+ }
441
+ if ((mode & MBFL_ZEN2HAN_NUMERIC ) && (c >= 0xFF10 && c <= 0xFF19 )) {
442
+ return c - 0xFEE0 ;
443
+ }
444
+ if ((mode & MBFL_ZEN2HAN_SPACE ) && (c == 0x3000 )) {
445
+ return ' ' ;
446
+ }
447
+ if ((mode & MBFL_ZEN2HAN_ALL ) && (c == 0x2212 )) { /* MINUS SIGN */
448
+ return '-' ;
449
+ }
450
+ }
451
+
452
+ if (mode & (MBFL_ZEN2HAN_KATAKANA | MBFL_ZEN2HAN_HIRAGANA )) {
453
+ /* Zenkaku kana to hankaku kana */
454
+ if ((mode & MBFL_ZEN2HAN_KATAKANA ) && c >= 0x30A1 && c <= 0x30F4 ) {
455
+ /* Zenkaku katakana to hankaku kana */
456
+ int n = c - 0x30A1 ;
457
+ if (zenkana2hankana_table [n ][1 ]) {
458
+ * second = 0xFF00 + zenkana2hankana_table [n ][1 ];
459
+ }
460
+ return 0xFF00 + zenkana2hankana_table [n ][0 ];
461
+ }
462
+ if ((mode & MBFL_ZEN2HAN_HIRAGANA ) && c >= 0x3041 && c <= 0x3093 ) {
463
+ /* Zenkaku hiragana to hankaku kana */
464
+ int n = c - 0x3041 ;
465
+ if (zenkana2hankana_table [n ][1 ]) {
466
+ * second = 0xFF00 + zenkana2hankana_table [n ][1 ];
467
+ }
468
+ return 0xFF00 + zenkana2hankana_table [n ][0 ];
469
+ }
470
+ if (c == 0x3001 ) {
471
+ return 0xFF64 ; /* HALFWIDTH IDEOGRAPHIC COMMA */
472
+ }
473
+ if (c == 0x3002 ) {
474
+ return 0xFF61 ; /* HALFWIDTH IDEOGRAPHIC FULL STOP */
475
+ }
476
+ if (c == 0x300C ) {
477
+ return 0xFF62 ; /* HALFWIDTH LEFT CORNER BRACKET */
478
+ }
479
+ if (c == 0x300D ) {
480
+ return 0xFF63 ; /* HALFWIDTH RIGHT CORNER BRACKET */
481
+ }
482
+ if (c == 0x309B ) {
483
+ return 0xFF9E ; /* HALFWIDTH KATAKANA VOICED SOUND MARK */
484
+ }
485
+ if (c == 0x309C ) {
486
+ return 0xff9f ; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
487
+ }
488
+ if (c == 0x30FC ) {
489
+ return 0xFF70 ; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
490
+ }
491
+ if (c == 0x30FB ) {
492
+ return 0xFF65 ; /* HALFWIDTH KATAKANA MIDDLE DOT */
493
+ }
494
+ }
495
+
496
+ if (mode & (MBFL_ZENKAKU_HIRA2KATA | MBFL_ZENKAKU_KATA2HIRA )) {
497
+ if ((mode & MBFL_ZENKAKU_HIRA2KATA ) && ((c >= 0x3041 && c <= 0x3093 ) || c == 0x309D || c == 0x309E )) {
498
+ /* Zenkaku hiragana to Zenkaku katakana */
499
+ return c + 0x60 ;
500
+ }
501
+ if ((mode & MBFL_ZENKAKU_KATA2HIRA ) && ((c >= 0x30A1 && c <= 0x30F3 ) || c == 0x30FD || c == 0x30FE )) {
502
+ /* Zenkaku katakana to Zenkaku hiragana */
503
+ return c - 0x60 ;
504
+ }
505
+ }
506
+
507
+ if (mode & MBFL_ZEN2HAN_SPECIAL ) { /* special symbol to ascii */
508
+ if (c == 0xFFE5 || c == 0xFF3C ) { /* FULLWIDTH YEN SIGN/FULLWIDTH REVERSE SOLIDUS */
509
+ return '\\' ;
510
+ }
511
+ if (c == 0xFFE3 || c == 0x203E ) { /* FULLWIDTH MACRON/OVERLINE */
512
+ return '~' ;
513
+ }
514
+ if (c == 0x2018 || c == 0x2019 ) { /* LEFT/RIGHT SINGLE QUOTATION MARK*/
515
+ return '\'' ;
516
+ }
517
+ if (c == 0x201C || c == 0x201D ) { /* LEFT/RIGHT DOUBLE QUOTATION MARK */
518
+ return '"' ;
519
+ }
520
+ }
521
+
522
+ return c ;
523
+ }
524
+
336
525
static int mbfl_filt_conv_wchar_cp50220 (int c , mbfl_convert_filter * filter )
337
526
{
338
527
int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE ;
0 commit comments