diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/_parser.rl similarity index 100% rename from ext/json/ext/parser/parser.rl rename to ext/json/ext/parser/_parser.rl diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 2906cfd1..7ddb2bbb 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1,3067 +1,206 @@ -/* This file is automatically generated from parser.rl by using ragel */ -#line 1 "parser.rl" -#include "ruby.h" -#include "../fbuffer/fbuffer.h" - -static VALUE mJSON, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; - -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; - -static int binary_encindex; -static int utf8_encindex; - -#ifdef HAVE_RB_CATEGORY_WARN -# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) -#else -# define json_deprecated(message) rb_warn(message) -#endif - -static const char deprecated_create_additions_warning[] = - "JSON.load implicit support for `create_additions: true` is deprecated " - "and will be removed in 3.0, use JSON.unsafe_load or explicitly " - "pass `create_additions: true`"; - -#ifndef HAVE_RB_HASH_BULK_INSERT -// For TruffleRuby -void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) -{ - long index = 0; - while (index < count) { - VALUE name = pairs[index++]; - VALUE value = pairs[index++]; - rb_hash_aset(hash, name, value); - } - RB_GC_GUARD(hash); -} -#endif - -/* name cache */ - -#include -#include - -// Object names are likely to be repeated, and are frozen. -// As such we can re-use them if we keep a cache of the ones we've seen so far, -// and save much more expensive lookups into the global fstring table. -// This cache implementation is deliberately simple, as we're optimizing for compactness, -// to be able to fit safely on the stack. -// As such, binary search into a sorted array gives a good tradeoff between compactness and -// performance. -#define JSON_RVALUE_CACHE_CAPA 63 -typedef struct rvalue_cache_struct { - int length; - VALUE entries[JSON_RVALUE_CACHE_CAPA]; -} rvalue_cache; - -static rb_encoding *enc_utf8; - -#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 - -static inline VALUE build_interned_string(const char *str, const long length) -{ -# ifdef HAVE_RB_ENC_INTERNED_STR - return rb_enc_interned_str(str, length, enc_utf8); -# else - VALUE rstring = rb_utf8_str_new(str, length); - return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); -# endif -} - -static inline VALUE build_symbol(const char *str, const long length) -{ - return rb_str_intern(build_interned_string(str, length)); -} - -static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) -{ - MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); - cache->length++; - cache->entries[index] = rstring; -} - -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) -{ - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { - return (int)(length - rstring_length); - } -} - -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, entry); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rstring = build_interned_string(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rstring); - } - return rstring; -} - -static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rsymbol = build_symbol(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rsymbol); - } - return rsymbol; -} - -/* rvalue stack */ - -#define RVALUE_STACK_INITIAL_CAPA 128 - -enum rvalue_stack_type { - RVALUE_STACK_HEAP_ALLOCATED = 0, - RVALUE_STACK_STACK_ALLOCATED = 1, -}; - -typedef struct rvalue_stack_struct { - enum rvalue_stack_type type; - long capa; - long head; - VALUE *ptr; -} rvalue_stack; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); - -static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) -{ - long required = stack->capa * 2; - - if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { - stack = rvalue_stack_spill(stack, handle, stack_ref); - } else { - REALLOC_N(stack->ptr, VALUE, required); - stack->capa = required; - } - return stack; -} - -static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) -{ - if (RB_UNLIKELY(stack->head >= stack->capa)) { - stack = rvalue_stack_grow(stack, handle, stack_ref); - } - stack->ptr[stack->head] = value; - stack->head++; -} - -static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) -{ - return stack->ptr + (stack->head - count); -} - -static inline void rvalue_stack_pop(rvalue_stack *stack, long count) -{ - stack->head -= count; -} - -static void rvalue_stack_mark(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - long index; - for (index = 0; index < stack->head; index++) { - rb_gc_mark(stack->ptr[index]); - } -} - -static void rvalue_stack_free(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - if (stack) { - ruby_xfree(stack->ptr); - ruby_xfree(stack); - } -} - -static size_t rvalue_stack_memsize(const void *ptr) -{ - const rvalue_stack *stack = (const rvalue_stack *)ptr; - return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; -} - -static const rb_data_type_t JSON_Parser_rvalue_stack_type = { - "JSON::Ext::Parser/rvalue_stack", - { - .dmark = rvalue_stack_mark, - .dfree = rvalue_stack_free, - .dsize = rvalue_stack_memsize, - }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) -{ - rvalue_stack *stack; - *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - *stack_ref = stack; - MEMCPY(stack, old_stack, rvalue_stack, 1); - - stack->capa = old_stack->capa << 1; - stack->ptr = ALLOC_N(VALUE, stack->capa); - stack->type = RVALUE_STACK_HEAP_ALLOCATED; - MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); - return stack; -} - -static void rvalue_stack_eagerly_release(VALUE handle) -{ - rvalue_stack *stack; - TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - RTYPEDDATA_DATA(handle) = NULL; - rvalue_stack_free(stack); -} - -/* unicode */ - -static const signed char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 -}; - -static uint32_t unescape_unicode(const unsigned char *p) -{ - const uint32_t replacement_char = 0xFFFD; - - signed char b; - uint32_t result = 0; - b = digit_values[p[0]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - return result; -} - -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) -{ - int len = 1; - if (ch <= 0x7F) { - buf[0] = (char) ch; - } else if (ch <= 0x07FF) { - buf[0] = (char) ((ch >> 6) | 0xC0); - buf[1] = (char) ((ch & 0x3F) | 0x80); - len++; - } else if (ch <= 0xFFFF) { - buf[0] = (char) ((ch >> 12) | 0xE0); - buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); - buf[2] = (char) ((ch & 0x3F) | 0x80); - len += 2; - } else if (ch <= 0x1fffff) { - buf[0] =(char) ((ch >> 18) | 0xF0); - buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); - buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); - buf[3] =(char) ((ch & 0x3F) | 0x80); - len += 3; - } else { - buf[0] = '?'; - } - return len; -} - -typedef struct JSON_ParserStruct { - VALUE create_id; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - VALUE match_string; - int max_nesting; - bool allow_nan; - bool allow_trailing_comma; - bool parsing_name; - bool symbolize_names; - bool freeze; - bool create_additions; - bool deprecated_create_additions; -} JSON_Parser; - -typedef struct JSON_ParserStateStruct { - JSON_Parser *json; - VALUE Vsource; - VALUE stack_handle; - char *source; - long len; - char *memo; - FBuffer fbuffer; - rvalue_stack *stack; - rvalue_cache name_cache; - int in_array; -} JSON_ParserState; - -#define GET_PARSER \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static const rb_data_type_t JSON_Parser_type; -static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); - -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) -{ - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); -} -#endif - -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; - - size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN); - const char *ptr = start; - - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} - - - -#line 472 "parser.rl" - - - -#line 454 "parser.c" -enum {JSON_object_start = 1}; -enum {JSON_object_first_final = 32}; -enum {JSON_object_error = 0}; - -enum {JSON_object_en_main = 1}; - - -#line 512 "parser.rl" - - -#define PUSH(result) rvalue_stack_push(state->stack, result, &state->stack_handle, &state->stack) - -static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - - long stack_head = state->stack->head; - - -#line 478 "parser.c" - { - cs = JSON_object_start; - } - -#line 527 "parser.rl" - -#line 485 "parser.c" - { - short _widec; - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 123 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 47: goto st28; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st2; - goto st0; -tr2: -#line 491 "parser.rl" - { - char *np; - json->parsing_name = true; - np = JSON_parse_string(state, json, p, pe, result); - json->parsing_name = false; - if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { - PUSH(*result); - {p = (( np))-1;} - } - } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 530 "parser.c" - switch( (*p) ) { - case 13: goto st3; - case 32: goto st3; - case 47: goto st4; - case 58: goto st8; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st5; - case 47: goto st7; - } - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 42 ) - goto st6; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st3; - } - goto st5; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 10 ) - goto st3; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 13: goto st8; - case 32: goto st8; - case 34: goto tr11; - case 45: goto tr11; - case 47: goto st24; - case 73: goto tr11; - case 78: goto tr11; - case 91: goto tr11; - case 102: goto tr11; - case 110: goto tr11; - case 116: goto tr11; - case 123: goto tr11; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr11; - } else if ( (*p) >= 9 ) - goto st8; - goto st0; -tr11: -#line 480 "parser.rl" - { - char *np = JSON_parse_value(state, json, p, pe, result, current_nesting); - if (np == NULL) { - p--; {p++; cs = 9; goto _out;} - } else { - {p = (( np))-1;} - } - } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 611 "parser.c" - _widec = (*p); - if ( (*p) < 13 ) { - if ( (*p) > 9 ) { - if ( 10 <= (*p) && (*p) <= 10 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 13 ) { - if ( (*p) < 44 ) { - if ( 32 <= (*p) && (*p) <= 32 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 44 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 125: goto tr4; - case 269: goto st10; - case 288: goto st10; - case 300: goto st11; - case 303: goto st16; - case 525: goto st9; - case 544: goto st9; - case 556: goto st2; - case 559: goto st20; - } - if ( _widec > 266 ) { - if ( 521 <= _widec && _widec <= 522 ) - goto st9; - } else if ( _widec >= 265 ) - goto st10; - goto st0; -tr4: -#line 502 "parser.rl" - { p--; {p++; cs = 32; goto _out;} } - goto st32; -st32: - if ( ++p == pe ) - goto _test_eof32; -case 32: -#line 679 "parser.c" - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 44: goto st11; - case 47: goto st16; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 13: goto st11; - case 32: goto st11; - case 34: goto tr2; - case 47: goto st12; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st11; - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - switch( (*p) ) { - case 42: goto st13; - case 47: goto st15; - } - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - if ( (*p) == 42 ) - goto st14; - goto st13; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - switch( (*p) ) { - case 42: goto st14; - case 47: goto st11; - } - goto st13; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - if ( (*p) == 10 ) - goto st11; - goto st15; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - switch( (*p) ) { - case 42: goto st17; - case 47: goto st19; - } - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - if ( (*p) == 42 ) - goto st18; - goto st17; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - switch( (*p) ) { - case 42: goto st18; - case 47: goto st10; - } - goto st17; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 10 ) - goto st10; - goto st19; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - _widec = (*p); - if ( (*p) > 42 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st17; - case 303: goto st19; - case 554: goto st21; - case 559: goto st23; - } - goto st0; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - _widec = (*p); - if ( (*p) < 42 ) { - if ( (*p) <= 41 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 42 ) { - if ( 43 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st18; - case 554: goto st22; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st21; - } else if ( _widec >= 128 ) - goto st17; - goto st0; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: - _widec = (*p); - if ( (*p) < 43 ) { - if ( (*p) > 41 ) { - if ( 42 <= (*p) && (*p) <= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 46 ) { - if ( (*p) > 47 ) { - if ( 48 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st18; - case 303: goto st10; - case 554: goto st22; - case 559: goto st9; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st21; - } else if ( _widec >= 128 ) - goto st17; - goto st0; -st23: - if ( ++p == pe ) - goto _test_eof23; -case 23: - _widec = (*p); - if ( (*p) < 10 ) { - if ( (*p) <= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 10 ) { - if ( 11 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 266: goto st10; - case 522: goto st9; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st23; - } else if ( _widec >= 128 ) - goto st19; - goto st0; -st24: - if ( ++p == pe ) - goto _test_eof24; -case 24: - switch( (*p) ) { - case 42: goto st25; - case 47: goto st27; - } - goto st0; -st25: - if ( ++p == pe ) - goto _test_eof25; -case 25: - if ( (*p) == 42 ) - goto st26; - goto st25; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - switch( (*p) ) { - case 42: goto st26; - case 47: goto st8; - } - goto st25; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: - if ( (*p) == 10 ) - goto st8; - goto st27; -st28: - if ( ++p == pe ) - goto _test_eof28; -case 28: - switch( (*p) ) { - case 42: goto st29; - case 47: goto st31; - } - goto st0; -st29: - if ( ++p == pe ) - goto _test_eof29; -case 29: - if ( (*p) == 42 ) - goto st30; - goto st29; -st30: - if ( ++p == pe ) - goto _test_eof30; -case 30: - switch( (*p) ) { - case 42: goto st30; - case 47: goto st2; - } - goto st29; -st31: - if ( ++p == pe ) - goto _test_eof31; -case 31: - if ( (*p) == 10 ) - goto st2; - goto st31; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof32: cs = 32; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof23: cs = 23; goto _test_eof; - _test_eof24: cs = 24; goto _test_eof; - _test_eof25: cs = 25; goto _test_eof; - _test_eof26: cs = 26; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof28: cs = 28; goto _test_eof; - _test_eof29: cs = 29; goto _test_eof; - _test_eof30: cs = 30; goto _test_eof; - _test_eof31: cs = 31; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 528 "parser.rl" +#include +#include + +typedef struct { + const uint8_t *cursor; + const uint8_t *end; +} j2_parser_t; + +static inline void +j2_eat_whitespace(j2_parser_t *parser) { + while (parser->cursor < parser->end) { + switch (*parser->cursor) { + case ' ': + case '\t': + case '\n': + case '\r': + parser->cursor++; + break; + default: + return; + } + } +} + +static VALUE +j2_parse_element(j2_parser_t *parser) { + j2_eat_whitespace(parser); + if (parser->cursor >= parser->end) { + rb_raise(rb_eRuntimeError, "unexpected end of input"); + } + + switch (*parser->cursor) { + case 'n': + if ((parser->end - parser->cursor >= 4) && (memcmp(parser->cursor, "null", 4) == 0)) { + parser->cursor += 4; + return Qnil; + } - if (cs >= JSON_object_first_final) { - long count = state->stack->head - stack_head; + rb_raise(rb_eRuntimeError, "unexpected character"); + break; + case 't': + if ((parser->end - parser->cursor >= 4) && (memcmp(parser->cursor, "true", 4) == 0)) { + parser->cursor += 4; + return Qtrue; + } - if (RB_UNLIKELY(json->object_class)) { - VALUE object = rb_class_new_instance(0, 0, json->object_class); - long index = 0; - VALUE *items = rvalue_stack_peek(state->stack, count); - while (index < count) { - VALUE name = items[index++]; - VALUE value = items[index++]; - rb_funcall(object, i_aset, 2, name, value); + rb_raise(rb_eRuntimeError, "unexpected character"); + break; + case 'f': + if ((parser->end - parser->cursor >= 5) && (memcmp(parser->cursor, "false", 5) == 0)) { + parser->cursor += 5; + return Qfalse; } - *result = object; - } else { - VALUE hash; -#ifdef HAVE_RB_HASH_NEW_CAPA - hash = rb_hash_new_capa(count >> 1); -#else - hash = rb_hash_new(); -#endif - rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), hash); - *result = hash; - } - rvalue_stack_pop(state->stack, count); - if (RB_UNLIKELY(json->create_additions)) { - VALUE klassname; - if (json->object_class) { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } else { - klassname = rb_hash_aref(*result, json->create_id); + rb_raise(rb_eRuntimeError, "unexpected character"); + break; + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { + // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/ + const uint8_t *start = parser->cursor; + while ((parser->cursor < parser->end) && (*parser->cursor >= '0') && (*parser->cursor <= '9')) { + parser->cursor++; } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - if (json->deprecated_create_additions) { - json_deprecated(deprecated_create_additions_warning); - } - *result = rb_funcall(klass, i_json_create, 1, *result); + + if ((parser->cursor < parser->end) && (*parser->cursor == '.')) { + parser->cursor++; + while ((parser->cursor < parser->end) && (*parser->cursor >= '0') && (*parser->cursor <= '9')) { + parser->cursor++; } } - } - return p + 1; - } else { - return NULL; - } -} - - -#line 1070 "parser.c" -enum {JSON_value_start = 1}; -enum {JSON_value_first_final = 29}; -enum {JSON_value_error = 0}; - -enum {JSON_value_en_main = 1}; - - -#line 661 "parser.rl" - -static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - -#line 1086 "parser.c" - { - cs = JSON_value_start; - } - -#line 668 "parser.rl" + if ((parser->cursor < parser->end) && ((*parser->cursor == 'e') || (*parser->cursor == 'E'))) { + parser->cursor++; + if ((parser->cursor < parser->end) && ((*parser->cursor == '+') || (*parser->cursor == '-'))) { + parser->cursor++; + } -#line 1093 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr3; - case 47: goto st6; - case 73: goto st10; - case 78: goto st17; - case 91: goto tr7; - case 102: goto st19; - case 110: goto st23; - case 116: goto st26; - case 123: goto tr11; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr3; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 606 "parser.rl" - { - char *np = JSON_parse_string(state, json, p, pe, result); - if (np == NULL) { - p--; - {p++; cs = 29; goto _out;} - } else { - {p = (( np))-1;} - } - } - goto st29; -tr3: -#line 616 "parser.rl" - { - char *np; - if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - {p = (( p + 10))-1;} - p--; {p++; cs = 29; goto _out;} - } else { - raise_parse_error("unexpected token at '%s'", p); + while ((parser->cursor < parser->end) && (*parser->cursor >= '0') && (*parser->cursor <= '9')) { + parser->cursor++; + } } - } - np = JSON_parse_number(state, json, p, pe, result); - if (np != NULL) { - {p = (( np))-1;} - } - p--; {p++; cs = 29; goto _out;} - } - goto st29; -tr7: -#line 634 "parser.rl" - { - char *np; - state->in_array++; - np = JSON_parse_array(state, json, p, pe, result, current_nesting + 1); - state->in_array--; - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} - } - goto st29; -tr11: -#line 642 "parser.rl" - { - char *np; - np = JSON_parse_object(state, json, p, pe, result, current_nesting + 1); - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} - } - goto st29; -tr25: -#line 599 "parser.rl" - { - if (json->allow_nan) { - *result = CInfinity; - } else { - raise_parse_error("unexpected token at '%s'", p - 7); - } - } - goto st29; -tr27: -#line 592 "parser.rl" - { - if (json->allow_nan) { - *result = CNaN; - } else { - raise_parse_error("unexpected token at '%s'", p - 2); - } - } - goto st29; -tr31: -#line 586 "parser.rl" - { - *result = Qfalse; - } - goto st29; -tr34: -#line 583 "parser.rl" - { - *result = Qnil; - } - goto st29; -tr37: -#line 589 "parser.rl" - { - *result = Qtrue; - } - goto st29; -st29: - if ( ++p == pe ) - goto _test_eof29; -case 29: -#line 648 "parser.rl" - { p--; {p++; cs = 29; goto _out;} } -#line 1220 "parser.c" - switch( (*p) ) { - case 13: goto st29; - case 32: goto st29; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st29; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st29; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st29; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 110 ) - goto st11; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - if ( (*p) == 102 ) - goto st12; - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 105 ) - goto st13; - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - if ( (*p) == 110 ) - goto st14; - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - if ( (*p) == 105 ) - goto st15; - goto st0; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - if ( (*p) == 116 ) - goto st16; - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - if ( (*p) == 121 ) - goto tr25; - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - if ( (*p) == 97 ) - goto st18; - goto st0; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - if ( (*p) == 78 ) - goto tr27; - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 97 ) - goto st20; - goto st0; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - if ( (*p) == 108 ) - goto st21; - goto st0; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - if ( (*p) == 115 ) - goto st22; - goto st0; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: - if ( (*p) == 101 ) - goto tr31; - goto st0; -st23: - if ( ++p == pe ) - goto _test_eof23; -case 23: - if ( (*p) == 117 ) - goto st24; - goto st0; -st24: - if ( ++p == pe ) - goto _test_eof24; -case 24: - if ( (*p) == 108 ) - goto st25; - goto st0; -st25: - if ( ++p == pe ) - goto _test_eof25; -case 25: - if ( (*p) == 108 ) - goto tr34; - goto st0; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - if ( (*p) == 114 ) - goto st27; - goto st0; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: - if ( (*p) == 117 ) - goto st28; - goto st0; -st28: - if ( ++p == pe ) - goto _test_eof28; -case 28: - if ( (*p) == 101 ) - goto tr37; - goto st0; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof29: cs = 29; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof23: cs = 23; goto _test_eof; - _test_eof24: cs = 24; goto _test_eof; - _test_eof25: cs = 25; goto _test_eof; - _test_eof26: cs = 26; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof28: cs = 28; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 669 "parser.rl" - - if (json->freeze) { - OBJ_FREEZE(*result); - } - - if (cs >= JSON_value_first_final) { - PUSH(*result); - return p; - } else { - return NULL; - } -} - - -#line 1476 "parser.c" -enum {JSON_integer_start = 1}; -enum {JSON_integer_first_final = 3}; -enum {JSON_integer_error = 0}; - -enum {JSON_integer_en_main = 1}; - - -#line 690 "parser.rl" - - -#define MAX_FAST_INTEGER_SIZE 18 -static inline VALUE fast_parse_integer(char *p, char *pe) -{ - bool negative = false; - if (*p == '-') { - negative = true; - p++; - } - long long memo = 0; - while (p < pe) { - memo *= 10; - memo += *p - '0'; - p++; - } + return rb_cstr_to_inum((const char *) start, (int) (parser->cursor - start), 10); + } + case '"': { + // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} + parser->cursor++; + const uint8_t *start = parser->cursor; + + while (parser->cursor < parser->end) { + if (*parser->cursor == '"') { + VALUE string = rb_enc_str_new((const char *) start, parser->cursor - start, rb_utf8_encoding()); + parser->cursor++; + return string; + } else if (*parser->cursor == '\\') { + // Parse escape sequence + parser->cursor++; + } - if (negative) { - memo = -memo; - } - return LL2NUM(memo); -} + parser->cursor++; + } -static char *JSON_decode_integer(JSON_ParserState *state, JSON_Parser *json, char *p, VALUE *result) -{ - long len = p - state->memo; - if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { - *result = fast_parse_integer(state->memo, p); - } else { - fbuffer_clear(&state->fbuffer); - fbuffer_append(&state->fbuffer, state->memo, len); - fbuffer_append_char(&state->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&state->fbuffer), 10); + rb_raise(rb_eRuntimeError, "unexpected end of input"); + break; } - return p + 1; -} - - -#line 1524 "parser.c" -enum {JSON_float_start = 1}; -enum {JSON_float_first_final = 6}; -enum {JSON_float_error = 0}; - -enum {JSON_float_en_main = 1}; - - -#line 742 "parser.rl" + case '[': { + VALUE array = rb_ary_new(); + parser->cursor++; + j2_eat_whitespace(parser); + if ((parser->cursor < parser->end) && (*parser->cursor == ']')) { + parser->cursor++; + return array; + } -static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - bool is_float = false; - - -#line 1541 "parser.c" - { - cs = JSON_float_start; - } - -#line 750 "parser.rl" - state->memo = p; - -#line 1549 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st6; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st6; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 45: goto st0; - case 46: goto tr8; - case 69: goto tr9; - case 101: goto tr9; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st0; - goto tr7; -tr7: -#line 734 "parser.rl" - { p--; {p++; cs = 7; goto _out;} } - goto st7; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: -#line 1596 "parser.c" - goto st0; -tr8: -#line 735 "parser.rl" - { is_float = true; } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 1606 "parser.c" - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 69: goto st4; - case 101: goto st4; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - } else if ( (*p) >= 45 ) - goto st0; - goto tr7; -tr9: -#line 735 "parser.rl" - { is_float = true; } - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 1632 "parser.c" - switch( (*p) ) { - case 43: goto st5; - case 45: goto st5; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; - goto st0; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - switch( (*p) ) { - case 69: goto st0; - case 101: goto st0; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; - } else if ( (*p) >= 45 ) - goto st0; - goto tr7; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 45: goto st0; - case 46: goto tr8; - case 69: goto tr9; - case 101: goto tr9; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - goto tr7; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 752 "parser.rl" - - if (cs >= JSON_float_first_final) { - if (!is_float) { - return JSON_decode_integer(state, json, p, result); - } - VALUE mod = Qnil; - ID method_id = 0; - if (json->decimal_class) { - if (rb_respond_to(json->decimal_class, i_try_convert)) { - mod = json->decimal_class; - method_id = i_try_convert; - } else if (rb_respond_to(json->decimal_class, i_new)) { - mod = json->decimal_class; - method_id = i_new; - } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) { - VALUE name = rb_class_name(json->decimal_class); - const char *name_cstr = RSTRING_PTR(name); - const char *last_colon = strrchr(name_cstr, ':'); - if (last_colon) { - const char *mod_path_end = last_colon - 1; - VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); - mod = rb_path_to_class(mod_path); - - const char *method_name_beg = last_colon + 1; - long before_len = method_name_beg - name_cstr; - long len = RSTRING_LEN(name) - before_len; - VALUE method_name = rb_str_substr(name, before_len, len); - method_id = SYM2ID(rb_str_intern(method_name)); - } else { - mod = rb_mKernel; - method_id = SYM2ID(rb_str_intern(name)); + while (parser->cursor < parser->end) { + VALUE element = j2_parse_element(parser); + rb_ary_push(array, element); + + switch (*parser->cursor) { + case ',': + parser->cursor++; + break; + case ']': + parser->cursor++; + return array; + default: + rb_raise(rb_eRuntimeError, "expected ',' or ']' after array value"); } } - } - long len = p - state->memo; - fbuffer_clear(&state->fbuffer); - fbuffer_append(&state->fbuffer, state->memo, len); - fbuffer_append_char(&state->fbuffer, '\0'); - - if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(&state->fbuffer)); - *result = rb_funcallv(mod, method_id, 1, &text); - } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&state->fbuffer), 1)); - } - - return p + 1; - } else { - return NULL; - } -} - - - -#line 1745 "parser.c" -enum {JSON_array_start = 1}; -enum {JSON_array_first_final = 22}; -enum {JSON_array_error = 0}; - -enum {JSON_array_en_main = 1}; - - -#line 832 "parser.rl" - - -static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - long stack_head = state->stack->head; - - -#line 1766 "parser.c" - { - cs = JSON_array_start; - } - -#line 844 "parser.rl" - -#line 1773 "parser.c" - { - short _widec; - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 91 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st18; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 93: goto tr4; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st2; - goto st0; -tr2: -#line 812 "parser.rl" - { - VALUE v = Qnil; - char *np = JSON_parse_value(state, json, p, pe, &v, current_nesting); - if (np == NULL) { - p--; {p++; cs = 3; goto _out;} - } else { - {p = (( np))-1;} + rb_raise(rb_eRuntimeError, "unexpected end of input"); + break; } - } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 1828 "parser.c" - _widec = (*p); - if ( 44 <= (*p) && (*p) <= 44 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 13: goto st3; - case 32: goto st3; - case 47: goto st4; - case 93: goto tr4; - case 300: goto st8; - case 556: goto st13; - } - if ( 9 <= _widec && _widec <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st5; - case 47: goto st7; - } - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 42 ) - goto st6; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st3; - } - goto st5; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 10 ) - goto st3; - goto st7; -tr4: -#line 824 "parser.rl" - { p--; {p++; cs = 22; goto _out;} } - goto st22; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: -#line 1887 "parser.c" - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 13: goto st8; - case 32: goto st8; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st9; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st8; - goto st0; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - switch( (*p) ) { - case 42: goto st10; - case 47: goto st12; - } - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 42 ) - goto st11; - goto st10; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 42: goto st11; - case 47: goto st8; - } - goto st10; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 10 ) - goto st8; - goto st12; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - _widec = (*p); - if ( (*p) < 13 ) { - if ( (*p) > 9 ) { - if ( 10 <= (*p) && (*p) <= 10 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 13 ) { - if ( (*p) > 32 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 32 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 34: goto tr2; - case 45: goto tr2; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 93: goto tr4; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - case 269: goto st8; - case 288: goto st8; - case 303: goto st9; - case 525: goto st13; - case 544: goto st13; - case 559: goto st14; - } - if ( _widec < 265 ) { - if ( 48 <= _widec && _widec <= 57 ) - goto tr2; - } else if ( _widec > 266 ) { - if ( 521 <= _widec && _widec <= 522 ) - goto st13; - } else - goto st8; - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - _widec = (*p); - if ( (*p) > 42 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st10; - case 303: goto st12; - case 554: goto st15; - case 559: goto st17; - } - goto st0; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - _widec = (*p); - if ( (*p) < 42 ) { - if ( (*p) <= 41 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 42 ) { - if ( 43 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st11; - case 554: goto st16; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st15; - } else if ( _widec >= 128 ) - goto st10; - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - _widec = (*p); - if ( (*p) < 43 ) { - if ( (*p) > 41 ) { - if ( 42 <= (*p) && (*p) <= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 46 ) { - if ( (*p) > 47 ) { - if ( 48 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st11; - case 303: goto st8; - case 554: goto st16; - case 559: goto st13; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st15; - } else if ( _widec >= 128 ) - goto st10; - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - _widec = (*p); - if ( (*p) < 10 ) { - if ( (*p) <= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 10 ) { - if ( 11 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 266: goto st8; - case 522: goto st13; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st17; - } else if ( _widec >= 128 ) - goto st12; - goto st0; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - switch( (*p) ) { - case 42: goto st19; - case 47: goto st21; - } - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 42 ) - goto st20; - goto st19; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - switch( (*p) ) { - case 42: goto st20; - case 47: goto st2; - } - goto st19; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - if ( (*p) == 10 ) - goto st2; - goto st21; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 845 "parser.rl" + case '{': { + parser->cursor++; + j2_eat_whitespace(parser); - if(cs >= JSON_array_first_final) { - long count = state->stack->head - stack_head; - - if (RB_UNLIKELY(json->array_class)) { - VALUE array = rb_class_new_instance(0, 0, json->array_class); - VALUE *items = rvalue_stack_peek(state->stack, count); - long index; - for (index = 0; index < count; index++) { - rb_funcall(array, i_leftshift, 1, items[index]); + if ((parser->cursor < parser->end) && (*parser->cursor == '}')) { + parser->cursor++; + return rb_hash_new(); } - *result = array; - } else { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count)); - *result = array; - } - rvalue_stack_pop(state->stack, count); - - return p + 1; - } else { - raise_parse_error("unexpected token at '%s'", p); - return NULL; - } -} - -static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) -{ - if (symbolize) { - intern = true; - } - VALUE result; -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); - } else { - result = rb_utf8_str_new(start, (long)(end - start)); - } -# else - result = rb_utf8_str_new(start, (long)(end - start)); - if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } -# endif - if (symbolize) { - result = rb_str_intern(result); - } - - return result; -} - -static VALUE json_string_fastpath(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - - if (is_name && state->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - return build_string(string, stringEnd, intern, symbolize); -} - -static VALUE json_string_unescape(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - char *p = string, *pe = string, *unescape, *bufferStart, *buffer; - int unescape_len; - char buf[4]; - - if (is_name && state->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - pe = memchr(p, '\\', bufferSize); - if (RB_UNLIKELY(pe == NULL)) { - return build_string(string, stringEnd, intern, symbolize); - } + VALUE elements = rb_ary_new(); + while (parser->cursor < parser->end) { + j2_eat_whitespace(parser); + if (*parser->cursor != '"') { + rb_raise(rb_eRuntimeError, "expected object key"); + } - VALUE result = rb_str_buf_new(bufferSize); - rb_enc_associate_index(result, utf8_encindex); - buffer = bufferStart = RSTRING_PTR(result); + VALUE key = j2_parse_element(parser); + j2_eat_whitespace(parser); - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - raise_parse_error("incomplete unicode character escape sequence at '%s'", p); - } else { - uint32_t ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - /* To handle values above U+FFFF, we take a sequence of - * \uXXXX escapes in the U+D800..U+DBFF then - * U+DC00..U+DFFF ranges, take the low 10 bits from each - * to make a 20-bit number, then add 0x10000 to get the - * final codepoint. - * - * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling - * Surrogate Pairs in UTF-16", and 23.6 "Surrogates - * Area". - */ - if ((ch & 0xFC00) == 0xD800) { - pe++; - if (pe > stringEnd - 6) { - raise_parse_error("incomplete surrogate pair at '%s'", p); - } - if (pe[0] == '\\' && pe[1] == 'u') { - uint32_t sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } - } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; + if ((parser->cursor >= parser->end) || (*parser->cursor != ':')) { + rb_raise(rb_eRuntimeError, "expected ':' after object key"); + } + parser->cursor++; + + VALUE value = j2_parse_element(parser); + VALUE pair[2] = { key, value }; + rb_ary_cat(elements, pair, 2); + + j2_eat_whitespace(parser); + switch (*parser->cursor) { + case ',': + parser->cursor++; + break; + case '}': { + parser->cursor++; + VALUE value = rb_hash_new_capa(RARRAY_LEN(elements)); + rb_hash_bulk_insert(RARRAY_LEN(elements), RARRAY_CONST_PTR(elements), value); + return value; } - break; - default: - p = pe; - continue; - } - MEMCPY(buffer, unescape, char, unescape_len); - buffer += unescape_len; - p = ++pe; - } else { - pe++; - } - } - - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - rb_str_set_len(result, buffer - bufferStart); - - if (symbolize) { - result = rb_str_intern(result); - } else if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } - - return result; -} - - -#line 2410 "parser.c" -enum {JSON_string_start = 1}; -enum {JSON_string_first_final = 9}; -enum {JSON_string_error = 0}; - -enum {JSON_string_en_main = 1}; - - -#line 1068 "parser.rl" - - -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) -{ - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; - } - return ST_CONTINUE; -} - -static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - -#line 2439 "parser.c" - { - cs = JSON_string_start; - } - -#line 1088 "parser.rl" - state->memo = p; - -#line 2447 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 34 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 34: goto tr2; - case 92: goto st3; - } - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st2; -tr2: -#line 1050 "parser.rl" - { - *result = json_string_fastpath(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } -#line 1043 "parser.rl" - { - *result = json_string_unescape(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } - goto st9; -tr6: -#line 1043 "parser.rl" - { - *result = json_string_unescape(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 2500 "parser.c" - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 117 ) - goto st5; - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 34: goto tr6; - case 92: goto st3; - } - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st4; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st6; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st6; - } else - goto st6; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st7; - } else - goto st7; - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st8; - } else - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st4; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st4; - } else - goto st4; - goto st0; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 1090 "parser.rl" - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; - } -} - -/* - * Document-class: JSON::Ext::Parser - * - * This is the JSON parser implemented as a C extension. It can be configured - * to be used by setting - * - * JSON.parser = JSON::Ext::Parser - * - * with the method parser= in JSON. - * - */ - -static VALUE convert_encoding(VALUE source) -{ - int encindex = RB_ENCODING_GET(source); - - if (RB_LIKELY(encindex == utf8_encindex)) { - return source; - } - - if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - } - - return rb_funcall(source, i_encode, 1, Encoding_UTF_8); -} - -static int configure_parser_i(VALUE key, VALUE val, VALUE data) -{ - JSON_Parser *json = (JSON_Parser *)data; - - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { - if (NIL_P(val)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(val); - json->deprecated_create_additions = false; - } - } - - return ST_CONTINUE; -} - -static void parser_init(JSON_Parser *json, VALUE opts) -{ - json->max_nesting = 100; - - if (!NIL_P(opts)) { - Check_Type(opts, T_HASH); - if (RHASH_SIZE(opts) > 0) { - // We assume in most cases few keys are set so it's faster to go over - // the provided keys than to check all possible keys. - rb_hash_foreach(opts, configure_parser_i, (VALUE)json); - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); + default: + rb_raise(rb_eRuntimeError, "expected ',' or '}' after object value"); + } } - if (json->create_additions && !json->create_id) { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } + rb_raise(rb_eRuntimeError, "unexpected end of input"); + break; } - + default: + rb_raise(rb_eRuntimeError, "unexpected character"); + break; } -} - -/* - * call-seq: new(opts => {}) - * - * Creates a new JSON::Ext::ParserConfig instance. - * - * It will be configured by the _opts_ hash. _opts_ can have the following - * keys: - * - * _opts_ can have the following keys: - * * *max_nesting*: The maximum depth of nesting allowed in the parsed data - * structures. Disable depth checking with :max_nesting => false|nil|0, it - * defaults to 100. - * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - * defiance of RFC 4627 to be parsed by the Parser. This option defaults to - * false. - * * *symbolize_names*: If set to true, returns symbols for the names - * (keys) in a JSON object. Otherwise strings are returned, which is - * also the default. It's not possible to use this option in - * conjunction with the *create_additions* option. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matching class and create_id was found. This option - * defaults to false. - * * *object_class*: Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * +new+ without arguments, and return an object that respond to +[]=+. - * * *array_class*: Defaults to Array If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * +new+ without arguments, and return an object that respond to +<<+. - * * *decimal_class*: Specifies which class to use instead of the default - * (Float) when parsing decimal numbers. This class must accept a single - * string argument in its constructor. - */ -static VALUE cParserConfig_initialize(VALUE self, VALUE opts) -{ - GET_PARSER; - parser_init(json, opts); - return self; + rb_raise(rb_eRuntimeError, "unexpected character"); } +static VALUE +j2_parse(VALUE self, VALUE value) { + Check_Type(value, T_STRING); -#line 2729 "parser.c" -enum {JSON_start = 1}; -enum {JSON_first_final = 10}; -enum {JSON_error = 0}; - -enum {JSON_en_main = 1}; - - -#line 1244 "parser.rl" - - -static VALUE cParser_parse_safe(VALUE vstate) -{ - JSON_ParserState *state = (JSON_ParserState *)vstate; - VALUE result = Qnil; - char *p, *pe; - int cs = EVIL; - JSON_Parser *json = state->json; - - -#line 2749 "parser.c" - { - cs = JSON_start; - } - -#line 1255 "parser.rl" - p = state->source; - pe = p + state->len; - -#line 2758 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st6; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 1236 "parser.rl" - { - char *np = JSON_parse_value(state, json, p, pe, &result, 0); - if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} - } - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 2802 "parser.c" - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st10; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st10; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 1258 "parser.rl" - - if (state->stack_handle) { - rvalue_stack_eagerly_release(state->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } -} - -static VALUE cParser_parse(JSON_Parser *json, VALUE Vsource) -{ - Vsource = convert_encoding(StringValue(Vsource)); - StringValue(Vsource); - - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - - JSON_ParserState _state = { - .json = json, - .len = RSTRING_LEN(Vsource), - .source = RSTRING_PTR(Vsource), - .Vsource = Vsource, - .stack = &stack, + const uint8_t *start = (const uint8_t *) RSTRING_PTR(value); + j2_parser_t parser = { + .cursor = start, + .end = start + RSTRING_LEN(value) }; - JSON_ParserState *state = &_state; - - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&state->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); - - int interupted; - VALUE result = rb_protect(cParser_parse_safe, (VALUE)state, &interupted); - - fbuffer_free(&state->fbuffer); - if (interupted) { - rb_jump_tag(interupted); - } - - return result; -} - -/* - * call-seq: parse(source) - * - * Parses the current JSON text _source_ and returns the complete data - * structure as a result. - * It raises JSON::ParserError if fail to parse. - */ -static VALUE cParserConfig_parse(VALUE self, VALUE Vsource) -{ - GET_PARSER; - return cParser_parse(json, Vsource); -} - -static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) -{ - Vsource = convert_encoding(StringValue(Vsource)); - StringValue(Vsource); - - JSON_Parser _parser = {0}; - JSON_Parser *json = &_parser; - parser_init(json, opts); - - return cParser_parse(json, Vsource); -} - -static void JSON_mark(void *ptr) -{ - JSON_Parser *json = ptr; - rb_gc_mark(json->create_id); - rb_gc_mark(json->object_class); - rb_gc_mark(json->array_class); - rb_gc_mark(json->decimal_class); - rb_gc_mark(json->match_string); -} - -static void JSON_free(void *ptr) -{ - JSON_Parser *json = ptr; - ruby_xfree(json); -} - -static size_t JSON_memsize(const void *ptr) -{ - return sizeof(JSON_Parser); -} -static const rb_data_type_t JSON_Parser_type = { - "JSON/Parser", - {JSON_mark, JSON_free, JSON_memsize,}, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static VALUE cJSON_parser_s_allocate(VALUE klass) -{ - JSON_Parser *json; - return TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); + return j2_parse_element(&parser); } -void Init_parser(void) -{ -#ifdef HAVE_RB_EXT_RACTOR_SAFE - rb_ext_ractor_safe(true); -#endif - -#undef rb_intern - rb_require("json/common"); - mJSON = rb_define_module("JSON"); - VALUE mExt = rb_define_module_under(mJSON, "Ext"); - VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject); - eNestingError = rb_path2class("JSON::NestingError"); - rb_gc_register_mark_object(eNestingError); - rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate); - rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1); - rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1); - - VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject); - rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); - - CNaN = rb_const_get(mJSON, rb_intern("NaN")); - rb_gc_register_mark_object(CNaN); - - CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - rb_gc_register_mark_object(CInfinity); - - CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); - rb_gc_register_mark_object(CMinusInfinity); - - rb_global_variable(&Encoding_UTF_8); - Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); - - sym_max_nesting = ID2SYM(rb_intern("max_nesting")); - sym_allow_nan = ID2SYM(rb_intern("allow_nan")); - sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); - sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); - sym_freeze = ID2SYM(rb_intern("freeze")); - sym_create_additions = ID2SYM(rb_intern("create_additions")); - sym_create_id = ID2SYM(rb_intern("create_id")); - sym_object_class = ID2SYM(rb_intern("object_class")); - sym_array_class = ID2SYM(rb_intern("array_class")); - sym_decimal_class = ID2SYM(rb_intern("decimal_class")); - sym_match_string = ID2SYM(rb_intern("match_string")); - - i_create_id = rb_intern("create_id"); - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_chr = rb_intern("chr"); - i_match = rb_intern("match"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); - i_new = rb_intern("new"); - i_try_convert = rb_intern("try_convert"); - i_uminus = rb_intern("-@"); - i_encode = rb_intern("encode"); - - binary_encindex = rb_ascii8bit_encindex(); - utf8_encindex = rb_utf8_encindex(); - enc_utf8 = rb_utf8_encoding(); +void +Init_json2(void) { + VALUE rb_cJSON2 = rb_define_module("JSON2"); + rb_define_singleton_method(rb_cJSON2, "parse", j2_parse, 1); } - -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */