diff options
author | Luke T. Shumaker <[email protected]> | 2024-02-22 20:51:28 -0700 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2024-10-08 14:10:05 +0900 |
commit | 6e47968929f2ee77376d28a6561266d8f8e3a4f7 (patch) | |
tree | 89a2561cfaef397c299bfd5c373fa2f290c934b0 /ext/json | |
parent | bad4ad63bff71748302d1089523a5dcbc40f6c57 (diff) |
[ruby/json] Delete code that is based on CVTUTF
I did this based on manual inspection, comparing the code to my re-created
history of CVTUTF at https://git.lukeshu.com/2git/cvtutf/ (created by the
scripts at https://git.lukeshu.com/2git/cvtutf-make/)
https://github.com/ruby/json/commit/0819553144
Diffstat (limited to 'ext/json')
-rw-r--r-- | ext/json/generator/generator.c | 232 | ||||
-rw-r--r-- | ext/json/generator/generator.h | 24 | ||||
-rw-r--r-- | ext/json/parser/parser.h | 12 |
3 files changed, 0 insertions, 268 deletions
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index b8734764c7..d3f6516511 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -18,54 +18,6 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth, i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash, i_strict; -/* - * Copyright 2001-2004 Unicode, Inc. - * - * Disclaimer - * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. - * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. - */ - -/* - * Index into the table below with the first byte of a UTF-8 sequence to - * get the number of trailing bytes that are supposed to follow it. - * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is - * left as-is for anyone who may want to do such conversion, which was - * allowed in earlier algorithms. - */ -static const char trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 -}; - -/* - * Magic values subtracted from a buffer value during UTF8 conversion. - * This table contains as many values as there might be trailing bytes - * in a UTF-8 sequence. - */ -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; - /* Escapes the UTF16 character and stores the result in the buffer buf. */ static void unicode_escape(char *buf, UTF16 character) { @@ -94,98 +46,6 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char scrip const UTF8 *sourceEnd = source + RSTRING_LEN(string); char buf[6] = { '\\', 'u' }; - int ascii_only = rb_enc_str_asciionly_p(string); - - if (!ascii_only) { - if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); - } - } - - while (source < sourceEnd) { - UTF32 ch = 0; - unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - /* - * The cases all fall through. See "Note A" below. - */ - switch (extraBytesToRead) { - case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 3: ch += *source++; ch <<= 6; - case 2: ch += *source++; ch <<= 6; - case 1: ch += *source++; ch <<= 6; - case 0: ch += *source++; - } - ch -= offsetsFromUTF8[extraBytesToRead]; - - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { -#if UNI_STRICT_CONVERSION - source -= (extraBytesToRead+1); /* return to the illegal value itself */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); -#else - unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); -#endif - } else { - /* normal case */ - if (ch >= 0x20 && ch <= 0x7f) { - switch (ch) { - case '\\': - fbuffer_append(buffer, "\\\\", 2); - break; - case '"': - fbuffer_append(buffer, "\\\"", 2); - break; - case '/': - if(script_safe) { - fbuffer_append(buffer, "\\/", 2); - break; - } - default: - fbuffer_append_char(buffer, (char)ch); - break; - } - } else { - switch (ch) { - case '\n': - fbuffer_append(buffer, "\\n", 2); - break; - case '\r': - fbuffer_append(buffer, "\\r", 2); - break; - case '\t': - fbuffer_append(buffer, "\\t", 2); - break; - case '\f': - fbuffer_append(buffer, "\\f", 2); - break; - case '\b': - fbuffer_append(buffer, "\\b", 2); - break; - default: - unicode_escape_to_buffer(buffer, buf, (UTF16) ch); - break; - } - } - } - } else if (ch > UNI_MAX_UTF16) { -#if UNI_STRICT_CONVERSION - source -= (extraBytesToRead+1); /* return to the start */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf8"); -#else - unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); -#endif - } else { - /* target is a character in range 0xFFFF - 0x10FFFF. */ - ch -= halfBase; - unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); - unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); - } - } RB_GC_GUARD(string); } @@ -202,98 +62,6 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe unsigned char c; char buf[6] = { '\\', 'u' }; int ascii_only = rb_enc_str_asciionly_p(string); - - if (!ascii_only) { - if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); - } - } - - for (start = 0, end = 0; end < len;) { - p = ptr + end; - c = (unsigned char) *p; - if (c < 0x20) { - switch (c) { - case '\n': - escape = "\\n"; - escape_len = 2; - break; - case '\r': - escape = "\\r"; - escape_len = 2; - break; - case '\t': - escape = "\\t"; - escape_len = 2; - break; - case '\f': - escape = "\\f"; - escape_len = 2; - break; - case '\b': - escape = "\\b"; - escape_len = 2; - break; - default: - unicode_escape(buf, (UTF16) *p); - escape = buf; - escape_len = 6; - break; - } - } else { - switch (c) { - case '\\': - escape = "\\\\"; - escape_len = 2; - break; - case '"': - escape = "\\\""; - escape_len = 2; - break; - case '/': - if(script_safe) { - escape = "\\/"; - escape_len = 2; - break; - } - default: - { - unsigned short clen = 1; - if (!ascii_only) { - clen += trailingBytesForUTF8[c]; - if (end + clen > len) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "partial character in source, but hit end"); - } - - if (script_safe && c == 0xE2) { - unsigned char c2 = (unsigned char) *(p+1); - unsigned char c3 = (unsigned char) *(p+2); - if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) { - fbuffer_append(buffer, ptr + start, end - start); - start = end = (end + clen); - if (c3 == 0xA8) { - fbuffer_append(buffer, "\\u2028", 6); - } else { - fbuffer_append(buffer, "\\u2029", 6); - } - continue; - } - } - } - end += clen; - } - continue; - break; - } - } - fbuffer_append(buffer, ptr + start, end - start); - fbuffer_append(buffer, escape, escape_len); - start = ++end; - escape = NULL; - } - fbuffer_append(buffer, ptr + start, end - start); } static char *fstrndup(const char *ptr, unsigned long len) { diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h index 8dde7be827..95ce2479c6 100644 --- a/ext/json/generator/generator.h +++ b/ext/json/generator/generator.h @@ -22,30 +22,6 @@ #define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key)) -/* unicode definitions */ - -#define UNI_STRICT_CONVERSION 1 - -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ - -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF - -static const int halfShift = 10; /* used for shifting by 10 bits */ - -static const UTF32 halfBase = 0x0010000UL; -static const UTF32 halfMask = 0x3FFUL; - static void unicode_escape(char *buf, UTF16 character); static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character); static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe); diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h index 651d40c074..d80f1b7303 100644 --- a/ext/json/parser/parser.h +++ b/ext/json/parser/parser.h @@ -19,18 +19,6 @@ #define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key)) -/* unicode */ - -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ - -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF - typedef struct JSON_ParserStruct { VALUE Vsource; char *source; |