5 static const char digit_values[256] = {
6 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
8 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
9 -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
10 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
11 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
14 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
15 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
16 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
17 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
18 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19 -1, -1, -1, -1, -1, -1, -1
22 static UTF32 unescape_unicode(const unsigned char *p)
26 b = digit_values[p[0]];
27 if (b < 0) return UNI_REPLACEMENT_CHAR;
28 result = (result << 4) | b;
29 b = digit_values[p[1]];
30 result = (result << 4) | b;
31 if (b < 0) return UNI_REPLACEMENT_CHAR;
32 b = digit_values[p[2]];
33 result = (result << 4) | b;
34 if (b < 0) return UNI_REPLACEMENT_CHAR;
35 b = digit_values[p[3]];
36 result = (result << 4) | b;
37 if (b < 0) return UNI_REPLACEMENT_CHAR;
41 static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
46 } else if (ch <= 0x07FF) {
47 buf[0] = (char) ((ch >> 6) | 0xC0);
48 buf[1] = (char) ((ch & 0x3F) | 0x80);
50 } else if (ch <= 0xFFFF) {
51 buf[0] = (char) ((ch >> 12) | 0xE0);
52 buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
53 buf[2] = (char) ((ch & 0x3F) | 0x80);
55 } else if (ch <= 0x1fffff) {
56 buf[0] =(char) ((ch >> 18) | 0xF0);
57 buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
58 buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
59 buf[3] =(char) ((ch & 0x3F) | 0x80);
67 #ifdef HAVE_RUBY_ENCODING_H
68 static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
69 CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
70 static ID i_encoding, i_encode;
75 static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
76 static VALUE CNaN, CInfinity, CMinusInfinity;
78 static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
79 i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_quirks_mode,
80 i_object_class, i_array_class, i_key_p, i_deep_const_get, i_match,
81 i_match_string, i_aset, i_leftshift;
89 c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/';
90 cpp_comment = '//' cr_neg* cr;
91 comment = c_comment | cpp_comment;
92 ignore = ws | comment;
94 value_separator = ',';
99 VInfinity = 'Infinity';
100 VMinusInfinity = '-Infinity';
101 begin_value = [nft\"\-\[\{NI] | digit;
107 begin_name = begin_string;
108 begin_number = digit | '-';
119 char *np = JSON_parse_value(json, fpc, pe, &v);
123 if (NIL_P(json->object_class)) {
124 rb_hash_aset(*result, last_name, v);
126 rb_funcall(*result, i_aset, 2, last_name, v);
134 json->parsing_name = 1;
135 np = JSON_parse_string(json, fpc, pe, &last_name);
136 json->parsing_name = 0;
137 if (np == NULL) { fhold; fbreak; } else fexec np;
140 action exit { fhold; fbreak; }
142 pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value;
143 next_pair = ignore* value_separator pair;
147 (pair (next_pair)*)? ignore*
152 static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
155 VALUE last_name = Qnil;
156 VALUE object_class = json->object_class;
158 if (json->max_nesting && json->current_nesting > json->max_nesting) {
159 rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
162 *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
167 if (cs >= JSON_object_first_final) {
168 if (json->create_additions) {
169 VALUE klassname = rb_hash_aref(*result, json->create_id);
170 if (!NIL_P(klassname)) {
171 VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
172 if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
173 *result = rb_funcall(klass, i_json_create, 1, *result);
200 if (json->allow_nan) {
203 rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2);
206 action parse_infinity {
207 if (json->allow_nan) {
210 rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8);
213 action parse_string {
214 char *np = JSON_parse_string(json, fpc, pe, result);
215 if (np == NULL) { fhold; fbreak; } else fexec np;
218 action parse_number {
220 if(pe > fpc + 9 - json->quirks_mode && !strncmp(MinusInfinity, fpc, 9)) {
221 if (json->allow_nan) {
222 *result = CMinusInfinity;
226 rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
229 np = JSON_parse_float(json, fpc, pe, result);
230 if (np != NULL) fexec np;
231 np = JSON_parse_integer(json, fpc, pe, result);
232 if (np != NULL) fexec np;
238 json->current_nesting++;
239 np = JSON_parse_array(json, fpc, pe, result);
240 json->current_nesting--;
241 if (np == NULL) { fhold; fbreak; } else fexec np;
244 action parse_object {
246 json->current_nesting++;
247 np = JSON_parse_object(json, fpc, pe, result);
248 json->current_nesting--;
249 if (np == NULL) { fhold; fbreak; } else fexec np;
252 action exit { fhold; fbreak; }
256 Vfalse @parse_false |
259 VInfinity @parse_infinity |
260 begin_number >parse_number |
261 begin_string >parse_string |
262 begin_array >parse_array |
263 begin_object >parse_object
267 static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result)
274 if (cs >= JSON_value_first_final) {
282 machine JSON_integer;
286 action exit { fhold; fbreak; }
288 main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
291 static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
299 if (cs >= JSON_integer_first_final) {
300 long len = p - json->memo;
301 *result = rb_Integer(rb_str_new(json->memo, len));
314 action exit { fhold; fbreak; }
317 (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
318 | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
319 ) (^[0-9Ee.\-]? @exit );
322 static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
330 if (cs >= JSON_float_first_final) {
331 long len = p - json->memo;
332 *result = rb_Float(rb_str_new(json->memo, len));
348 char *np = JSON_parse_value(json, fpc, pe, &v);
352 if (NIL_P(json->array_class)) {
353 rb_ary_push(*result, v);
355 rb_funcall(*result, i_leftshift, 1, v);
361 action exit { fhold; fbreak; }
363 next_element = value_separator ignore* begin_value >parse_value;
365 main := begin_array ignore*
366 ((begin_value >parse_value ignore*)
367 (ignore* next_element ignore*)*)?
371 static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
374 VALUE array_class = json->array_class;
376 if (json->max_nesting && json->current_nesting > json->max_nesting) {
377 rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
379 *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
384 if(cs >= JSON_array_first_final) {
387 rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
392 static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd)
394 char *p = string, *pe = string, *unescape;
397 while (pe < stringEnd) {
399 unescape = (char *) "?";
401 if (pe > p) rb_str_buf_cat(result, p, pe - p);
404 unescape = (char *) "\n";
407 unescape = (char *) "\r";
410 unescape = (char *) "\t";
413 unescape = (char *) "\"";
416 unescape = (char *) "\\";
419 unescape = (char *) "\b";
422 unescape = (char *) "\f";
425 if (pe > stringEnd - 4) {
429 UTF32 ch = unescape_unicode((unsigned char *) ++pe);
431 if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
433 if (pe > stringEnd - 6) return Qnil;
434 if (pe[0] == '\\' && pe[1] == 'u') {
435 UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
436 ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
440 unescape = (char *) "?";
444 unescape_len = convert_UTF32_to_UTF8(buf, ch);
452 rb_str_buf_cat(result, unescape, unescape_len);
458 rb_str_buf_cat(result, p, pe - p);
468 action parse_string {
469 *result = json_string_unescape(*result, json->memo + 1, p);
470 if (NIL_P(*result)) {
479 action exit { fhold; fbreak; }
481 main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
485 match_i(VALUE regexp, VALUE klass, VALUE memo)
487 if (regexp == Qundef) return ST_STOP;
488 if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
489 RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
490 rb_ary_push(memo, klass);
496 static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
501 *result = rb_str_buf_new(0);
506 if (json->create_additions && RTEST(match_string = json->match_string)) {
508 VALUE memo = rb_ary_new2(2);
509 rb_ary_push(memo, *result);
510 rb_hash_foreach(match_string, match_i, memo);
511 klass = rb_ary_entry(memo, 1);
513 *result = rb_funcall(klass, i_json_create, 1, *result);
517 if (json->symbolize_names && json->parsing_name) {
518 *result = rb_str_intern(*result);
520 if (cs >= JSON_string_first_final) {
528 * Document-class: JSON::Ext::Parser
530 * This is the JSON parser implemented as a C extension. It can be configured
531 * to be used by setting
533 * JSON.parser = JSON::Ext::Parser
535 * with the method parser= in JSON.
539 static VALUE convert_encoding(VALUE source)
541 char *ptr = RSTRING_PTR(source);
542 long len = RSTRING_LEN(source);
544 rb_raise(eParserError, "A JSON text must at least contain two octets!");
546 #ifdef HAVE_RUBY_ENCODING_H
548 VALUE encoding = rb_funcall(source, i_encoding, 0);
549 if (encoding == CEncoding_ASCII_8BIT) {
550 if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
551 source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32BE);
552 } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
553 source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16BE);
554 } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
555 source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32LE);
556 } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
557 source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16LE);
559 source = rb_str_dup(source);
563 source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
567 if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
568 source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source);
569 } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
570 source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source);
571 } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
572 source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source);
573 } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
574 source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source);
581 * call-seq: new(source, opts => {})
583 * Creates a new JSON::Ext::Parser instance for the string _source_.
585 * Creates a new JSON::Ext::Parser instance for the string _source_.
587 * It will be configured by the _opts_ hash. _opts_ can have the following
590 * _opts_ can have the following keys:
591 * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
592 * structures. Disable depth checking with :max_nesting => false|nil|0, it
594 * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
595 * defiance of RFC 4627 to be parsed by the Parser. This option defaults to
597 * * *symbolize_names*: If set to true, returns symbols for the names
598 * (keys) in a JSON object. Otherwise strings are returned, which is also
600 * * *create_additions*: If set to false, the Parser doesn't create
601 * additions even if a matchin class and create_id was found. This option
603 * * *object_class*: Defaults to Hash
604 * * *array_class*: Defaults to Array
605 * * *quirks_mode*: Enables quirks_mode for parser, that is for example
606 * parsing single JSON values instead of documents is possible.
609 static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
615 rb_raise(rb_eTypeError, "already initialized instance");
617 rb_scan_args(argc, argv, "11", &source, &opts);
619 opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
621 rb_raise(rb_eArgError, "opts needs to be like a hash");
623 VALUE tmp = ID2SYM(i_max_nesting);
624 if (option_given_p(opts, tmp)) {
625 VALUE max_nesting = rb_hash_aref(opts, tmp);
626 if (RTEST(max_nesting)) {
627 Check_Type(max_nesting, T_FIXNUM);
628 json->max_nesting = FIX2INT(max_nesting);
630 json->max_nesting = 0;
633 json->max_nesting = 19;
635 tmp = ID2SYM(i_allow_nan);
636 if (option_given_p(opts, tmp)) {
637 json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
641 tmp = ID2SYM(i_symbolize_names);
642 if (option_given_p(opts, tmp)) {
643 json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
645 json->symbolize_names = 0;
647 tmp = ID2SYM(i_quirks_mode);
648 if (option_given_p(opts, tmp)) {
649 VALUE quirks_mode = rb_hash_aref(opts, tmp);
650 json->quirks_mode = RTEST(quirks_mode) ? 1 : 0;
652 json->quirks_mode = 0;
654 tmp = ID2SYM(i_create_additions);
655 if (option_given_p(opts, tmp)) {
656 json->create_additions = RTEST(rb_hash_aref(opts, tmp));
658 json->create_additions = 0;
660 tmp = ID2SYM(i_create_id);
661 if (option_given_p(opts, tmp)) {
662 json->create_id = rb_hash_aref(opts, tmp);
664 json->create_id = rb_funcall(mJSON, i_create_id, 0);
666 tmp = ID2SYM(i_object_class);
667 if (option_given_p(opts, tmp)) {
668 json->object_class = rb_hash_aref(opts, tmp);
670 json->object_class = Qnil;
672 tmp = ID2SYM(i_array_class);
673 if (option_given_p(opts, tmp)) {
674 json->array_class = rb_hash_aref(opts, tmp);
676 json->array_class = Qnil;
678 tmp = ID2SYM(i_match_string);
679 if (option_given_p(opts, tmp)) {
680 VALUE match_string = rb_hash_aref(opts, tmp);
681 json->match_string = RTEST(match_string) ? match_string : Qnil;
683 json->match_string = Qnil;
687 json->max_nesting = 19;
689 json->create_additions = 1;
690 json->create_id = rb_funcall(mJSON, i_create_id, 0);
691 json->object_class = Qnil;
692 json->array_class = Qnil;
694 if (!json->quirks_mode) {
695 source = convert_encoding(StringValue(source));
697 json->current_nesting = 0;
698 json->len = RSTRING_LEN(source);
699 json->source = RSTRING_PTR(source);;
700 json->Vsource = source;
711 action parse_object {
713 json->current_nesting = 1;
714 np = JSON_parse_object(json, fpc, pe, &result);
715 if (np == NULL) { fhold; fbreak; } else fexec np;
720 json->current_nesting = 1;
721 np = JSON_parse_array(json, fpc, pe, &result);
722 if (np == NULL) { fhold; fbreak; } else fexec np;
726 begin_object >parse_object |
727 begin_array >parse_array
731 static VALUE cParser_parse_strict(VALUE self)
743 if (cs >= JSON_first_final && p == pe) {
746 rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
753 machine JSON_quirks_mode;
760 char *np = JSON_parse_value(json, fpc, pe, &result);
761 if (np == NULL) { fhold; fbreak; } else fexec np;
765 begin_value >parse_value
769 static VALUE cParser_parse_quirks_mode(VALUE self)
781 if (cs >= JSON_quirks_mode_first_final && p == pe) {
784 rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
792 * Parses the current JSON text _source_ and returns the complete data
793 * structure as a result.
795 static VALUE cParser_parse(VALUE self)
799 if (json->quirks_mode) {
800 return cParser_parse_quirks_mode(self);
802 return cParser_parse_strict(self);
807 static JSON_Parser *JSON_allocate()
809 JSON_Parser *json = ALLOC(JSON_Parser);
810 MEMZERO(json, JSON_Parser, 1);
814 static void JSON_mark(JSON_Parser *json)
816 rb_gc_mark_maybe(json->Vsource);
817 rb_gc_mark_maybe(json->create_id);
818 rb_gc_mark_maybe(json->object_class);
819 rb_gc_mark_maybe(json->array_class);
820 rb_gc_mark_maybe(json->match_string);
823 static void JSON_free(JSON_Parser *json)
828 static VALUE cJSON_parser_s_allocate(VALUE klass)
830 JSON_Parser *json = JSON_allocate();
831 return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json);
837 * Returns a copy of the current _source_ string, that was used to construct
840 static VALUE cParser_source(VALUE self)
843 return rb_str_dup(json->Vsource);
847 * call-seq: quirks_mode?()
849 * Returns a true, if this parser is in quirks_mode, false otherwise.
851 static VALUE cParser_quirks_mode_p(VALUE self)
854 return json->quirks_mode ? Qtrue : Qfalse;
860 rb_require("json/common");
861 mJSON = rb_define_module("JSON");
862 mExt = rb_define_module_under(mJSON, "Ext");
863 cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
864 eParserError = rb_path2class("JSON::ParserError");
865 eNestingError = rb_path2class("JSON::NestingError");
866 rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
867 rb_define_method(cParser, "initialize", cParser_initialize, -1);
868 rb_define_method(cParser, "parse", cParser_parse, 0);
869 rb_define_method(cParser, "source", cParser_source, 0);
870 rb_define_method(cParser, "quirks_mode?", cParser_quirks_mode_p, 0);
872 CNaN = rb_const_get(mJSON, rb_intern("NaN"));
873 CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
874 CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
876 i_json_creatable_p = rb_intern("json_creatable?");
877 i_json_create = rb_intern("json_create");
878 i_create_id = rb_intern("create_id");
879 i_create_additions = rb_intern("create_additions");
880 i_chr = rb_intern("chr");
881 i_max_nesting = rb_intern("max_nesting");
882 i_allow_nan = rb_intern("allow_nan");
883 i_symbolize_names = rb_intern("symbolize_names");
884 i_quirks_mode = rb_intern("quirks_mode");
885 i_object_class = rb_intern("object_class");
886 i_array_class = rb_intern("array_class");
887 i_match = rb_intern("match");
888 i_match_string = rb_intern("match_string");
889 i_key_p = rb_intern("key?");
890 i_deep_const_get = rb_intern("deep_const_get");
891 i_aset = rb_intern("[]=");
892 i_leftshift = rb_intern("<<");
893 #ifdef HAVE_RUBY_ENCODING_H
894 CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
895 CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
896 CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
897 CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
898 CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
899 CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
900 i_encoding = rb_intern("encoding");
901 i_encode = rb_intern("encode");
903 i_iconv = rb_intern("iconv");
911 * indent-tabs-mode: nil