summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/json/generator/generator.c215
-rw-r--r--ext/json/generator/generator.h1
2 files changed, 185 insertions, 31 deletions
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index 0af1592a70..b9946b3cf7 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -25,14 +25,13 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend;
* Everything else (should be UTF-8) is just passed through and
* appended to the result.
*/
-static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_ascii_only, bool out_script_safe)
+static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_script_safe)
{
const char *hexdig = "0123456789abcdef";
char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
const char *in_utf8_str = RSTRING_PTR(in_string);
unsigned long in_utf8_len = RSTRING_LEN(in_string);
- bool in_is_ascii_only = rb_enc_str_asciionly_p(in_string);
unsigned long beg = 0, pos;
@@ -42,46 +41,183 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_
bool should_escape;
/* UTF-8 decoding */
- if (in_is_ascii_only) {
- ch = in_utf8_str[pos];
- ch_len = 1;
- } else {
- short i;
- if ((in_utf8_str[pos] & 0x80) == 0x00) { ch_len = 1; ch = in_utf8_str[pos]; } /* leading 1 bit is 0b0 */
- else if ((in_utf8_str[pos] & 0xE0) == 0xC0) { ch_len = 2; ch = in_utf8_str[pos] & 0x1F; } /* leading 3 bits are 0b110 */
- else if ((in_utf8_str[pos] & 0xF0) == 0xE0) { ch_len = 3; ch = in_utf8_str[pos] & 0x0F; } /* leading 4 bits are 0b1110 */
- else if ((in_utf8_str[pos] & 0xF8) == 0xF0) { ch_len = 4; ch = in_utf8_str[pos] & 0x07; } /* leading 5 bits are 0b11110 */
- else
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf-8");
- if ((pos+ch_len) > in_utf8_len)
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "partial character in source, but hit end");
- for (i = 1; i < ch_len; i++) {
- if ((in_utf8_str[pos+i] & 0xC0) != 0x80) /* leading 2 bits should be 0b10 */
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf-8");
- ch = (ch<<6) | (in_utf8_str[pos+i] & 0x3F);
+ short i;
+ if ((in_utf8_str[pos] & 0x80) == 0x00) { ch_len = 1; ch = in_utf8_str[pos]; } /* leading 1 bit is 0b0 */
+ else if ((in_utf8_str[pos] & 0xE0) == 0xC0) { ch_len = 2; ch = in_utf8_str[pos] & 0x1F; } /* leading 3 bits are 0b110 */
+ else if ((in_utf8_str[pos] & 0xF0) == 0xE0) { ch_len = 3; ch = in_utf8_str[pos] & 0x0F; } /* leading 4 bits are 0b1110 */
+ else if ((in_utf8_str[pos] & 0xF8) == 0xF0) { ch_len = 4; ch = in_utf8_str[pos] & 0x07; } /* leading 5 bits are 0b11110 */
+ else {
+ rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8");
+ }
+
+ for (i = 1; i < ch_len; i++) {
+ ch = (ch<<6) | (in_utf8_str[pos+i] & 0x3F);
+ }
+
+ /* JSON policy */
+ should_escape =
+ (ch < 0x20) ||
+ (ch == '"') ||
+ (ch == '\\') ||
+ (out_script_safe && (ch == '/')) ||
+ (out_script_safe && (ch == 0x2028)) ||
+ (out_script_safe && (ch == 0x2029));
+
+ /* JSON encoding */
+ if (should_escape) {
+ if (pos > beg) {
+ fbuffer_append(out_buffer, &in_utf8_str[beg], pos - beg);
+ }
+
+ beg = pos + ch_len;
+ switch (ch) {
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
+ default:
+ if (ch <= 0xFFFF) {
+ scratch[2] = hexdig[ch >> 12];
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
+ scratch[5] = hexdig[ch & 0xf];
+ fbuffer_append(out_buffer, scratch, 6);
+ } else {
+ uint16_t hi, lo;
+ ch -= 0x10000;
+ hi = 0xD800 + (uint16_t)(ch >> 10);
+ lo = 0xDC00 + (uint16_t)(ch & 0x3FF);
+
+ scratch[2] = hexdig[hi >> 12];
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
+ scratch[5] = hexdig[hi & 0xf];
+
+ scratch[8] = hexdig[lo >> 12];
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
+ scratch[11] = hexdig[lo & 0xf];
+
+ fbuffer_append(out_buffer, scratch, 12);
+ }
}
- if (ch > 0x10FFFF)
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf-8");
}
+ pos += ch_len;
+ }
+
+ if (beg < in_utf8_len) {
+ fbuffer_append(out_buffer, &in_utf8_str[beg], in_utf8_len - beg);
+ }
+
+ RB_GC_GUARD(in_string);
+}
+
+static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, bool out_script_safe)
+{
+ const char *hexdig = "0123456789abcdef";
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
+
+ const char *ptr = RSTRING_PTR(str);
+ unsigned long len = RSTRING_LEN(str);
+
+ unsigned long beg = 0, pos;
+
+ for (pos = 0; pos < len;) {
+ unsigned char ch = ptr[pos];
+ bool should_escape;
+
/* JSON policy */
should_escape =
(ch < 0x20) ||
(ch == '"') ||
(ch == '\\') ||
- (out_ascii_only && (ch > 0x7F)) ||
+ (out_script_safe && (ch == '/'));
+
+ /* JSON encoding */
+ if (should_escape) {
+ if (pos > beg) {
+ fbuffer_append(out_buffer, &ptr[beg], pos - beg);
+ }
+
+ beg = pos + 1;
+ switch (ch) {
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
+ default:
+ scratch[2] = hexdig[ch >> 12];
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
+ scratch[5] = hexdig[ch & 0xf];
+ fbuffer_append(out_buffer, scratch, 6);
+ }
+ }
+
+ pos++;
+ }
+
+ if (beg < len) {
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
+ }
+
+ RB_GC_GUARD(str);
+}
+
+static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE in_string, bool out_script_safe)
+{
+ const char *hexdig = "0123456789abcdef";
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
+
+ const char *in_utf8_str = RSTRING_PTR(in_string);
+ unsigned long in_utf8_len = RSTRING_LEN(in_string);
+
+ unsigned long beg = 0, pos;
+
+ for (pos = 0; pos < in_utf8_len;) {
+ uint32_t ch;
+ short ch_len;
+ bool should_escape;
+
+ /* UTF-8 decoding */
+ short i;
+ if ((in_utf8_str[pos] & 0x80) == 0x00) { ch_len = 1; ch = in_utf8_str[pos]; } /* leading 1 bit is 0b0 */
+ else if ((in_utf8_str[pos] & 0xE0) == 0xC0) { ch_len = 2; ch = in_utf8_str[pos] & 0x1F; } /* leading 3 bits are 0b110 */
+ else if ((in_utf8_str[pos] & 0xF0) == 0xE0) { ch_len = 3; ch = in_utf8_str[pos] & 0x0F; } /* leading 4 bits are 0b1110 */
+ else if ((in_utf8_str[pos] & 0xF8) == 0xF0) { ch_len = 4; ch = in_utf8_str[pos] & 0x07; } /* leading 5 bits are 0b11110 */
+ else {
+ rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8");
+ }
+
+ for (i = 1; i < ch_len; i++) {
+ ch = (ch<<6) | (in_utf8_str[pos+i] & 0x3F);
+ }
+
+ /* JSON policy */
+ should_escape =
+ (ch < 0x20) ||
+ (ch == '"') ||
+ (ch == '\\') ||
+ (ch > 0x7F) ||
(out_script_safe && (ch == '/')) ||
(out_script_safe && (ch == 0x2028)) ||
(out_script_safe && (ch == 0x2029));
/* JSON encoding */
if (should_escape) {
- if (pos > beg)
+ if (pos > beg) {
fbuffer_append(out_buffer, &in_utf8_str[beg], pos - beg);
+ }
+
beg = pos + ch_len;
switch (ch) {
case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
@@ -122,8 +258,11 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_
pos += ch_len;
}
- if (beg < in_utf8_len)
+
+ if (beg < in_utf8_len) {
fbuffer_append(out_buffer, &in_utf8_str[beg], in_utf8_len - beg);
+ }
+
RB_GC_GUARD(in_string);
}
@@ -570,11 +709,27 @@ static int enc_utf8_compatible_p(int enc_idx)
static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
{
- fbuffer_append_char(buffer, '"');
if (!enc_utf8_compatible_p(RB_ENCODING_GET(obj))) {
obj = rb_str_export_to_enc(obj, rb_utf8_encoding());
}
- convert_UTF8_to_JSON(buffer, obj, state->ascii_only, state->script_safe);
+
+ fbuffer_append_char(buffer, '"');
+
+ switch(rb_enc_str_coderange(obj)) {
+ case ENC_CODERANGE_7BIT:
+ convert_ASCII_to_JSON(buffer, obj, state->script_safe);
+ break;
+ case ENC_CODERANGE_VALID:
+ if (RB_UNLIKELY(state->ascii_only)) {
+ convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe);
+ } else {
+ convert_UTF8_to_JSON(buffer, obj, state->script_safe);
+ }
+ break;
+ default:
+ rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8");
+ break;
+ }
fbuffer_append_char(buffer, '"');
}
diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h
index 09bf68079c..0553277fa6 100644
--- a/ext/json/generator/generator.h
+++ b/ext/json/generator/generator.h
@@ -23,7 +23,6 @@ typedef unsigned char _Bool;
#endif
#endif
-static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_ascii_only, bool out_script_safe);
static char *fstrndup(const char *ptr, unsigned long len);
/* ruby api and some helpers */