34
34
#include "unicode_table_cp932_ext.h"
35
35
#include "unicode_table_jis.h"
36
36
#include "cp932_table.h"
37
+ #include "emoji2uni.h"
38
+
39
+ static size_t mb_iso2022jp_kddi_to_wchar (unsigned char * * in , size_t * in_len , uint32_t * buf , size_t bufsize , unsigned int * state );
40
+ static void mb_wchar_to_iso2022jp_kddi (uint32_t * in , size_t len , mb_convert_buf * buf , bool end );
37
41
38
42
static int mbfl_filt_conv_2022jp_mobile_wchar_flush (mbfl_convert_filter * filter );
39
43
static int mbfl_filt_conv_wchar_2022jp_mobile_flush (mbfl_convert_filter * filter );
40
44
45
+ extern int mbfl_bisec_srch2 (int w , const unsigned short tbl [], int n );
46
+
41
47
static const char * mbfl_encoding_2022jp_kddi_aliases [] = {"ISO-2022-JP-KDDI" , NULL };
42
48
43
49
const mbfl_encoding mbfl_encoding_2022jp_kddi = {
@@ -49,8 +55,8 @@ const mbfl_encoding mbfl_encoding_2022jp_kddi = {
49
55
MBFL_ENCTYPE_GL_UNSAFE ,
50
56
& vtbl_2022jp_kddi_wchar ,
51
57
& vtbl_wchar_2022jp_kddi ,
52
- NULL ,
53
- NULL
58
+ mb_iso2022jp_kddi_to_wchar ,
59
+ mb_wchar_to_iso2022jp_kddi
54
60
};
55
61
56
62
const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = {
@@ -115,6 +121,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = {
115
121
s1 = ((c1) << 8) | (c2); \
116
122
s2 = 1
117
123
124
+ #define ASCII 0
118
125
#define JISX0201_KANA 0x20
119
126
#define JISX0208_KANJI 0x80
120
127
@@ -363,3 +370,279 @@ static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter)
363
370
364
371
return 0 ;
365
372
}
373
+
374
+ static size_t mb_iso2022jp_kddi_to_wchar (unsigned char * * in , size_t * in_len , uint32_t * buf , size_t bufsize , unsigned int * state )
375
+ {
376
+ unsigned char * p = * in , * e = p + * in_len ;
377
+ uint32_t * out = buf , * limit = buf + bufsize - 1 ;
378
+
379
+ while (p < e && out < limit ) {
380
+ unsigned char c = * p ++ ;
381
+
382
+ if (c == 0x1B ) {
383
+ if ((e - p ) < 2 ) {
384
+ p = e ;
385
+ * out ++ = MBFL_BAD_INPUT ;
386
+ break ;
387
+ }
388
+ unsigned char c2 = * p ++ ;
389
+ unsigned char c3 = * p ++ ;
390
+
391
+ if (c2 == '$' ) {
392
+ if (c3 == '@' || c3 == 'B' ) {
393
+ * state = JISX0208_KANJI ;
394
+ } else if (c3 == '(' ) {
395
+ if (p == e ) {
396
+ * out ++ = MBFL_BAD_INPUT ;
397
+ break ;
398
+ }
399
+ unsigned char c4 = * p ++ ;
400
+
401
+ if (c4 == '@' || c4 == 'B' ) {
402
+ * state = JISX0208_KANJI ;
403
+ } else {
404
+ * out ++ = MBFL_BAD_INPUT ;
405
+ }
406
+ } else {
407
+ * out ++ = MBFL_BAD_INPUT ;
408
+ }
409
+ } else if (c2 == '(' ) {
410
+ if (c3 == 'B' || c3 == 'J' ) {
411
+ * state = ASCII ;
412
+ } else if (c3 == 'I' ) {
413
+ * state = JISX0201_KANA ;
414
+ } else {
415
+ * out ++ = MBFL_BAD_INPUT ;
416
+ }
417
+ } else {
418
+ p -- ;
419
+ * out ++ = MBFL_BAD_INPUT ;
420
+ }
421
+ } else if (* state == JISX0201_KANA && c >= 0x21 && c <= 0x5F ) {
422
+ * out ++ = 0xFF40 + c ;
423
+ } else if (* state == JISX0208_KANJI && c >= 0x21 && c <= 0x7F ) {
424
+ if (p == e ) {
425
+ * out ++ = MBFL_BAD_INPUT ;
426
+ break ;
427
+ }
428
+ unsigned char c2 = * p ++ ;
429
+
430
+ if (c2 >= 0x21 && c2 <= 0x7E ) {
431
+ unsigned int s = ((c - 0x21 ) * 94 ) + c2 - 0x21 ;
432
+ uint32_t w = 0 ;
433
+
434
+ if (s <= 137 ) {
435
+ if (s == 31 ) {
436
+ w = 0xFF3C ; /* FULLWIDTH REVERSE SOLIDUS */
437
+ } else if (s == 32 ) {
438
+ w = 0xFF5E ; /* FULLWIDTH TILDE */
439
+ } else if (s == 33 ) {
440
+ w = 0x2225 ; /* PARALLEL TO */
441
+ } else if (s == 60 ) {
442
+ w = 0xFF0D ; /* FULLWIDTH HYPHEN-MINUS */
443
+ } else if (s == 80 ) {
444
+ w = 0xFFE0 ; /* FULLWIDTH CENT SIGN */
445
+ } else if (s == 81 ) {
446
+ w = 0xFFE1 ; /* FULLWIDTH POUND SIGN */
447
+ } else if (s == 137 ) {
448
+ w = 0xFFE2 ; /* FULLWIDTH NOT SIGN */
449
+ }
450
+ }
451
+
452
+ if (s >= (84 * 94 ) && s < (91 * 94 )) {
453
+ int snd = 0 ;
454
+ s += 22 * 94 ;
455
+ w = mbfilter_sjis_emoji_kddi2unicode (s , & snd );
456
+ if (w && snd ) {
457
+ * out ++ = snd ;
458
+ }
459
+ }
460
+
461
+ if (!w ) {
462
+ if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max ) {
463
+ w = cp932ext1_ucs_table [s - cp932ext1_ucs_table_min ];
464
+ } else if (s < jisx0208_ucs_table_size ) {
465
+ w = jisx0208_ucs_table [s ];
466
+ }
467
+ }
468
+
469
+ * out ++ = w ? w : MBFL_BAD_INPUT ;
470
+ } else {
471
+ * out ++ = MBFL_BAD_INPUT ;
472
+ }
473
+ } else if (c <= 0x7F ) {
474
+ * out ++ = c ;
475
+ } else if (c >= 0xA1 && c <= 0xDF ) {
476
+ * out ++ = 0xFEC0 + c ;
477
+ } else {
478
+ * out ++ = MBFL_BAD_INPUT ;
479
+ }
480
+ }
481
+
482
+ * in_len = e - p ;
483
+ * in = p ;
484
+ return out - buf ;
485
+ }
486
+
487
+ /* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
488
+ * These correspond to the letters A-Z
489
+ * To display the flag emoji for a country, two unicode codepoints are combined,
490
+ * which correspond to the two-letter code for that country
491
+ * This macro converts uppercase ASCII values to Regional Indicator codepoints */
492
+ #define NFLAGS (c ) (0x1F1A5+((unsigned int)(c)))
493
+
494
+ static const char nflags_s [10 ][2 ] = {
495
+ "CN" ,"DE" ,"ES" ,"FR" ,"GB" ,"IT" ,"JP" ,"KR" ,"RU" ,"US"
496
+ };
497
+ static const int nflags_code_kddi [10 ] = {
498
+ 0x2549 , 0x2546 , 0x24C0 , 0x2545 , 0x2548 , 0x2547 , 0x2750 , 0x254A , 0x24C1 , 0x27F7
499
+ };
500
+
501
+ static void mb_wchar_to_iso2022jp_kddi (uint32_t * in , size_t len , mb_convert_buf * buf , bool end )
502
+ {
503
+ unsigned char * out , * limit ;
504
+ MB_CONVERT_BUF_LOAD (buf , out , limit );
505
+ MB_CONVERT_BUF_ENSURE (buf , out , limit , len );
506
+
507
+ while (len -- ) {
508
+ uint32_t w = * in ++ ;
509
+ unsigned int s = 0 ;
510
+
511
+ if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max ) {
512
+ s = ucs_a1_jis_table [w - ucs_a1_jis_table_min ];
513
+ } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max ) {
514
+ s = ucs_a2_jis_table [w - ucs_a2_jis_table_min ];
515
+ } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max ) {
516
+ s = ucs_i_jis_table [w - ucs_i_jis_table_min ];
517
+ } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max ) {
518
+ s = ucs_r_jis_table [w - ucs_r_jis_table_min ];
519
+ }
520
+
521
+ if (!s ) {
522
+ if (w == 0xA5 ) { /* YEN SIGN */
523
+ s = 0x216F ; /* FULLWIDTH YEN SIGN */
524
+ } else if (w == 0xFF3C ) { /* FULLWIDTH REVERSE SOLIDUS */
525
+ s = 0x2140 ;
526
+ } else if (w == 0x2225 ) { /* PARALLEL TO */
527
+ s = 0x2142 ;
528
+ } else if (w == 0xFF0D ) { /* FULLWIDTH HYPHEN-MINUS */
529
+ s = 0x215D ;
530
+ } else if (w == 0xFFE0 ) { /* FULLWIDTH CENT SIGN */
531
+ s = 0x2171 ;
532
+ } else if (w == 0xFFE1 ) { /* FULLWIDTH POUND SIGN */
533
+ s = 0x2172 ;
534
+ } else if (w == 0xFFE2 ) { /* FULLWIDTH NOT SIGN */
535
+ s = 0x224C ;
536
+ }
537
+ }
538
+
539
+ if ((w == '#' || (w >= '0' && w <= '9' )) && len ) {
540
+ uint32_t w2 = * in ++ ; len -- ;
541
+
542
+ if (w2 == 0x20E3 ) {
543
+ unsigned int s1 = 0 ;
544
+ if (w == '#' ) {
545
+ s1 = 0x25BC ;
546
+ } else if (w == '0' ) {
547
+ s1 = 0x2830 ;
548
+ } else { /* Previous character was '1'-'9' */
549
+ s1 = 0x27A6 + (w - '1' );
550
+ }
551
+ s = (((s1 / 94 ) + 0x21 ) << 8 ) + ((s1 % 94 ) + 0x21 ) - 0x1600 ;
552
+ } else {
553
+ in -- ; len ++ ;
554
+ }
555
+ } else if (w >= NFLAGS ('C' ) && w <= NFLAGS ('U' ) && len ) { /* C for CN, U for US */
556
+ uint32_t w2 = * in ++ ; len -- ;
557
+
558
+ if (w2 >= NFLAGS ('B' ) && w2 <= NFLAGS ('U' )) { /* B for GB, U for RU */
559
+ for (int i = 0 ; i < 10 ; i ++ ) {
560
+ if (w == NFLAGS (nflags_s [i ][0 ]) && w2 == NFLAGS (nflags_s [i ][1 ])) {
561
+ unsigned int s1 = nflags_code_kddi [i ];
562
+ s = (((s1 / 94 ) + 0x21 ) << 8 ) + ((s1 % 94 ) + 0x21 ) - 0x1600 ;
563
+ goto found_flag_emoji ;
564
+ }
565
+ }
566
+ }
567
+
568
+ in -- ; len ++ ;
569
+ found_flag_emoji : ;
570
+ }
571
+
572
+ if (w == 0xA9 ) { /* Copyright sign */
573
+ unsigned int s1 = 0x27DC ;
574
+ s = (((s1 / 94 ) + 0x21 ) << 8 ) + ((s1 % 94 ) + 0x21 ) - 0x1600 ;
575
+ } else if (w == 0xAE ) { /* Registered sign */
576
+ unsigned int s1 = 0x27DD ;
577
+ s = (((s1 / 94 ) + 0x21 ) << 8 ) + ((s1 % 94 ) + 0x21 ) - 0x1600 ;
578
+ } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max ) {
579
+ int i = mbfl_bisec_srch2 (w , mb_tbl_uni_kddi2code2_key , mb_tbl_uni_kddi2code2_len );
580
+ if (i >= 0 ) {
581
+ unsigned int s1 = mb_tbl_uni_kddi2code2_value [i ];
582
+ s = (((s1 / 94 ) + 0x21 ) << 8 ) + ((s1 % 94 ) + 0x21 ) - 0x1600 ;
583
+ }
584
+ } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max ) {
585
+ int i = mbfl_bisec_srch2 (w - 0x10000 , mb_tbl_uni_kddi2code3_key , mb_tbl_uni_kddi2code3_len );
586
+ if (i >= 0 ) {
587
+ unsigned int s1 = mb_tbl_uni_kddi2code3_value [i ];
588
+ s = (((s1 / 94 ) + 0x21 ) << 8 ) + ((s1 % 94 ) + 0x21 ) - 0x1600 ;
589
+ }
590
+ } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max ) {
591
+ int i = mbfl_bisec_srch2 (w - 0xF0000 , mb_tbl_uni_kddi2code5_key , mb_tbl_uni_kddi2code5_len );
592
+ if (i >= 0 ) {
593
+ unsigned int s1 = mb_tbl_uni_kddi2code5_val [i ];
594
+ s = (((s1 / 94 ) + 0x21 ) << 8 ) + ((s1 % 94 ) + 0x21 ) - 0x1600 ;
595
+ }
596
+ }
597
+
598
+ if (!s || s >= 0xA1A1 ) {
599
+ s = 0 ;
600
+ for (int i = 0 ; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min ; i ++ ) {
601
+ if (w == cp932ext1_ucs_table [i ]) {
602
+ s = (((i / 94 ) + 0x2D ) << 8 ) + (i % 94 ) + 0x21 ;
603
+ break ;
604
+ }
605
+ }
606
+ if (w == 0 )
607
+ s = 0 ;
608
+ }
609
+
610
+ if (!s && w ) {
611
+ MB_CONVERT_ERROR (buf , out , limit , w , mb_wchar_to_iso2022jp_kddi );
612
+ MB_CONVERT_BUF_ENSURE (buf , out , limit , len );
613
+ } else if (s <= 0x7F ) {
614
+ if (buf -> state != ASCII ) {
615
+ MB_CONVERT_BUF_ENSURE (buf , out , limit , len + 4 );
616
+ out = mb_convert_buf_add3 (out , 0x1B , '(' , 'B' );
617
+ buf -> state = ASCII ;
618
+ }
619
+ out = mb_convert_buf_add (out , s );
620
+ } else if (s >= 0xA1 && s <= 0xDF ) {
621
+ if (buf -> state != JISX0201_KANA ) {
622
+ MB_CONVERT_BUF_ENSURE (buf , out , limit , len + 4 );
623
+ out = mb_convert_buf_add3 (out , 0x1B , '(' , 'I' );
624
+ buf -> state = JISX0201_KANA ;
625
+ }
626
+ out = mb_convert_buf_add (out , s & 0x7F );
627
+ } else if (s <= 0x7E7E ) {
628
+ if (buf -> state != JISX0208_KANJI ) {
629
+ MB_CONVERT_BUF_ENSURE (buf , out , limit , len + 5 );
630
+ out = mb_convert_buf_add3 (out , 0x1B , '$' , 'B' );
631
+ buf -> state = JISX0208_KANJI ;
632
+ } else {
633
+ MB_CONVERT_BUF_ENSURE (buf , out , limit , len + 2 );
634
+ }
635
+ out = mb_convert_buf_add2 (out , (s >> 8 ) & 0xFF , s & 0xFF );
636
+ } else {
637
+ MB_CONVERT_ERROR (buf , out , limit , w , mb_wchar_to_iso2022jp_kddi );
638
+ MB_CONVERT_BUF_ENSURE (buf , out , limit , len );
639
+ }
640
+ }
641
+
642
+ if (end && buf -> state != ASCII ) {
643
+ MB_CONVERT_BUF_ENSURE (buf , out , limit , 3 );
644
+ out = mb_convert_buf_add3 (out , 0x1B , '(' , 'B' );
645
+ }
646
+
647
+ MB_CONVERT_BUF_STORE (buf , out , limit );
648
+ }
0 commit comments