3 #ifdef HAVE_RUBY_ENCODING_H
4 static VALUE CEncoding_UTF_8;
5 static ID i_encoding, i_encode;
8 static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
9 mHash, mArray, mFixnum, mBignum, mFloat, mString, mString_Extend,
10 mTrueClass, mFalseClass, mNilClass, eGeneratorError,
11 eNestingError, CRegexp_MULTILINE, CJSON_SAFE_STATE_PROTOTYPE,
12 i_SAFE_STATE_PROTOTYPE;
14 static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
15 i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
16 i_quirks_mode, i_pack, i_unpack, i_create_id, i_extend, i_key_p,
17 i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth, i_dup;
20 * Copyright 2001-2004 Unicode, Inc.
24 * This source code is provided as is by Unicode, Inc. No claims are
25 * made as to fitness for any particular purpose. No warranties of any
26 * kind are expressed or implied. The recipient agrees to determine
27 * applicability of information provided. If this file has been
28 * purchased on magnetic or optical media from Unicode, Inc., the
29 * sole remedy for any claim will be exchange of defective media
30 * within 90 days of receipt.
32 * Limitations on Rights to Redistribute This Code
34 * Unicode, Inc. hereby grants the right to freely use the information
35 * supplied in this file in the creation of products supporting the
36 * Unicode Standard, and to make copies of this file in any form
37 * for internal or external distribution as long as this notice
42 * Index into the table below with the first byte of a UTF-8 sequence to
43 * get the number of trailing bytes that are supposed to follow it.
44 * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
45 * left as-is for anyone who may want to do such conversion, which was
46 * allowed in earlier algorithms.
48 static const char trailingBytesForUTF8[256] = {
49 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
50 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
51 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
52 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
54 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
56 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
60 * Magic values subtracted from a buffer value during UTF8 conversion.
61 * This table contains as many values as there might be trailing bytes
62 * in a UTF-8 sequence.
64 static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
65 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
68 * Utility routine to tell whether a sequence of bytes is legal UTF-8.
69 * This must be called with the length pre-determined by the first byte.
70 * If not calling this from ConvertUTF8to*, then the length can be set by:
71 * length = trailingBytesForUTF8[*source]+1;
72 * and the sequence is illegal right away if there aren't that many bytes
74 * If presented with a length > 4, this returns 0. The Unicode
75 * definition of UTF-8 goes up to 4-byte sequences.
77 static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length)
80 const UTF8 *srcptr = source+length;
83 /* Everything else falls through when "1"... */
84 case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
85 case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
86 case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
89 /* no fall-through in this inner switch */
90 case 0xE0: if (a < 0xA0) return 0; break;
91 case 0xED: if (a > 0x9F) return 0; break;
92 case 0xF0: if (a < 0x90) return 0; break;
93 case 0xF4: if (a > 0x8F) return 0; break;
94 default: if (a < 0x80) return 0;
97 case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
99 if (*source > 0xF4) return 0;
103 /* Escapes the UTF16 character and stores the result in the buffer buf. */
104 static void unicode_escape(char *buf, UTF16 character)
106 const char *digits = "0123456789abcdef";
108 buf[2] = digits[character >> 12];
109 buf[3] = digits[(character >> 8) & 0xf];
110 buf[4] = digits[(character >> 4) & 0xf];
111 buf[5] = digits[character & 0xf];
114 /* Escapes the UTF16 character and stores the result in the buffer buf, then
115 * the buffer buf іs appended to the FBuffer buffer. */
116 static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
119 unicode_escape(buf, character);
120 fbuffer_append(buffer, buf, 6);
123 /* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
124 * and control characters are JSON escaped. */
125 static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string)
127 const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
128 const UTF8 *sourceEnd = source + RSTRING_LEN(string);
129 char buf[6] = { '\\', 'u' };
131 while (source < sourceEnd) {
133 unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
134 if (source + extraBytesToRead >= sourceEnd) {
135 rb_raise(rb_path2class("JSON::GeneratorError"),
136 "partial character in source, but hit end");
138 if (!isLegalUTF8(source, extraBytesToRead+1)) {
139 rb_raise(rb_path2class("JSON::GeneratorError"),
140 "source sequence is illegal/malformed utf-8");
143 * The cases all fall through. See "Note A" below.
145 switch (extraBytesToRead) {
146 case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
147 case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
148 case 3: ch += *source++; ch <<= 6;
149 case 2: ch += *source++; ch <<= 6;
150 case 1: ch += *source++; ch <<= 6;
151 case 0: ch += *source++;
153 ch -= offsetsFromUTF8[extraBytesToRead];
155 if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
156 /* UTF-16 surrogate values are illegal in UTF-32 */
157 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
158 #if UNI_STRICT_CONVERSION
159 source -= (extraBytesToRead+1); /* return to the illegal value itself */
160 rb_raise(rb_path2class("JSON::GeneratorError"),
161 "source sequence is illegal/malformed utf-8");
163 unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
167 if (ch >= 0x20 && ch <= 0x7f) {
170 fbuffer_append(buffer, "\\\\", 2);
173 fbuffer_append(buffer, "\\\"", 2);
176 fbuffer_append_char(buffer, (char)ch);
182 fbuffer_append(buffer, "\\n", 2);
185 fbuffer_append(buffer, "\\r", 2);
188 fbuffer_append(buffer, "\\t", 2);
191 fbuffer_append(buffer, "\\f", 2);
194 fbuffer_append(buffer, "\\b", 2);
197 unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
202 } else if (ch > UNI_MAX_UTF16) {
203 #if UNI_STRICT_CONVERSION
204 source -= (extraBytesToRead+1); /* return to the start */
205 rb_raise(rb_path2class("JSON::GeneratorError"),
206 "source sequence is illegal/malformed utf8");
208 unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
211 /* target is a character in range 0xFFFF - 0x10FFFF. */
213 unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
214 unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
219 /* Converts string to a JSON string in FBuffer buffer, where only the
220 * characters required by the JSON standard are JSON escaped. The remaining
221 * characters (should be UTF8) are just passed through and appended to the
223 static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string)
225 const char *ptr = RSTRING_PTR(string), *p;
226 unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
227 const char *escape = NULL;
230 char buf[6] = { '\\', 'u' };
232 for (start = 0, end = 0; end < len;) {
234 c = (unsigned char) *p;
258 unicode_escape(buf, (UTF16) *p);
279 fbuffer_append(buffer, ptr + start, end - start);
280 fbuffer_append(buffer, escape, escape_len);
284 fbuffer_append(buffer, ptr + start, end - start);
287 static char *fstrndup(const char *ptr, unsigned long len) {
289 if (len <= 0) return NULL;
290 result = ALLOC_N(char, len);
291 memccpy(result, ptr, 0, len);
295 /* fbuffer implementation */
297 static FBuffer *fbuffer_alloc()
299 FBuffer *fb = ALLOC(FBuffer);
300 memset((void *) fb, 0, sizeof(FBuffer));
301 fb->initial_length = FBUFFER_INITIAL_LENGTH;
305 static FBuffer *fbuffer_alloc_with_length(unsigned long initial_length)
308 assert(initial_length > 0);
310 memset((void *) fb, 0, sizeof(FBuffer));
311 fb->initial_length = initial_length;
315 static void fbuffer_free(FBuffer *fb)
317 if (fb->ptr) ruby_xfree(fb->ptr);
321 static void fbuffer_clear(FBuffer *fb)
326 static void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
328 unsigned long required;
331 fb->ptr = ALLOC_N(char, fb->initial_length);
332 fb->capa = fb->initial_length;
335 for (required = fb->capa; requested > required - fb->len; required <<= 1);
337 if (required > fb->capa) {
338 REALLOC_N(fb->ptr, char, required);
343 static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len)
346 fbuffer_inc_capa(fb, len);
347 MEMCPY(fb->ptr + fb->len, newstr, char, len);
352 static void fbuffer_append_str(FBuffer *fb, VALUE str)
354 const char *newstr = StringValuePtr(str);
355 unsigned long len = RSTRING_LEN(str);
359 fbuffer_append(fb, newstr, len);
362 static void fbuffer_append_char(FBuffer *fb, char newchr)
364 fbuffer_inc_capa(fb, 1);
365 *(fb->ptr + fb->len) = newchr;
369 static void freverse(char *start, char *end)
373 while (end > start) {
374 c = *end, *end-- = *start, *start++ = c;
378 static long fltoa(long number, char *buf)
380 static char digits[] = "0123456789";
384 if (sign < 0) number = -number;
385 do *tmp++ = digits[number % 10]; while (number /= 10);
386 if (sign < 0) *tmp++ = '-';
387 freverse(buf, tmp - 1);
391 static void fbuffer_append_long(FBuffer *fb, long number)
394 unsigned long len = fltoa(number, buf);
395 fbuffer_append(fb, buf, len);
398 static FBuffer *fbuffer_dup(FBuffer *fb)
400 unsigned long len = fb->len;
404 result = fbuffer_alloc_with_length(len);
405 fbuffer_append(result, FBUFFER_PAIR(fb));
407 result = fbuffer_alloc();
413 * Document-module: JSON::Ext::Generator
415 * This is the JSON generator implemented as a C extension. It can be
416 * configured to be used by setting
418 * JSON.generator = JSON::Ext::Generator
420 * with the method generator= in JSON.
425 * call-seq: to_json(state = nil)
427 * Returns a JSON string containing a JSON object, that is generated from
428 * this Hash instance.
429 * _state_ is a JSON::State object, that can also be used to configure the
430 * produced JSON string output further.
432 static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
434 GENERATE_JSON(object);
438 * call-seq: to_json(state = nil)
440 * Returns a JSON string containing a JSON array, that is generated from
441 * this Array instance.
442 * _state_ is a JSON::State object, that can also be used to configure the
443 * produced JSON string output further.
445 static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
446 GENERATE_JSON(array);
450 * call-seq: to_json(*)
452 * Returns a JSON string representation for this Integer number.
454 static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
456 GENERATE_JSON(fixnum);
460 * call-seq: to_json(*)
462 * Returns a JSON string representation for this Integer number.
464 static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
466 GENERATE_JSON(bignum);
470 * call-seq: to_json(*)
472 * Returns a JSON string representation for this Float number.
474 static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
476 GENERATE_JSON(float);
480 * call-seq: String.included(modul)
482 * Extends _modul_ with the String::Extend module.
484 static VALUE mString_included_s(VALUE self, VALUE modul) {
485 VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
490 * call-seq: to_json(*)
492 * This string should be encoded with UTF-8 A call to this method
493 * returns a JSON string encoded with UTF16 big endian characters as
496 static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
498 GENERATE_JSON(string);
502 * call-seq: to_json_raw_object()
504 * This method creates a raw object hash, that can be nested into
505 * other data structures and will be generated as a raw string. This
506 * method should be used, if you want to convert raw strings to JSON
507 * instead of UTF-8 strings, e. g. binary data.
509 static VALUE mString_to_json_raw_object(VALUE self)
512 VALUE result = rb_hash_new();
513 rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
514 ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
515 rb_hash_aset(result, rb_str_new2("raw"), ary);
520 * call-seq: to_json_raw(*args)
522 * This method creates a JSON text from the result of a call to
523 * to_json_raw_object of this String.
525 static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
527 VALUE obj = mString_to_json_raw_object(self);
528 Check_Type(obj, T_HASH);
529 return mHash_to_json(argc, argv, obj);
533 * call-seq: json_create(o)
535 * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
536 * key "raw"). The Ruby String can be created by this module method.
538 static VALUE mString_Extend_json_create(VALUE self, VALUE o)
541 Check_Type(o, T_HASH);
542 ary = rb_hash_aref(o, rb_str_new2("raw"));
543 return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
547 * call-seq: to_json(*)
549 * Returns a JSON string for true: 'true'.
551 static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
557 * call-seq: to_json(*)
559 * Returns a JSON string for false: 'false'.
561 static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
563 GENERATE_JSON(false);
567 * call-seq: to_json(*)
569 * Returns a JSON string for nil: 'null'.
571 static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
577 * call-seq: to_json(*)
579 * Converts this object to a string (calling #to_s), converts
580 * it to a JSON string, and returns the result. This is a fallback, if no
581 * special method #to_json was defined for some object.
583 static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
586 VALUE string = rb_funcall(self, i_to_s, 0);
587 rb_scan_args(argc, argv, "01", &state);
588 Check_Type(string, T_STRING);
589 state = cState_from_state_s(cState, state);
590 return cState_partial_generate(state, string);
593 static void State_free(JSON_Generator_State *state)
595 if (state->indent) ruby_xfree(state->indent);
596 if (state->space) ruby_xfree(state->space);
597 if (state->space_before) ruby_xfree(state->space_before);
598 if (state->object_nl) ruby_xfree(state->object_nl);
599 if (state->array_nl) ruby_xfree(state->array_nl);
600 if (state->array_delim) fbuffer_free(state->array_delim);
601 if (state->object_delim) fbuffer_free(state->object_delim);
602 if (state->object_delim2) fbuffer_free(state->object_delim2);
606 static JSON_Generator_State *State_allocate()
608 JSON_Generator_State *state = ALLOC(JSON_Generator_State);
609 MEMZERO(state, JSON_Generator_State, 1);
613 static VALUE cState_s_allocate(VALUE klass)
615 JSON_Generator_State *state = State_allocate();
616 return Data_Wrap_Struct(klass, NULL, State_free, state);
620 * call-seq: configure(opts)
622 * Configure this State instance with the Hash _opts_, and return
625 static VALUE cState_configure(VALUE self, VALUE opts)
629 tmp = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
630 if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h");
632 rb_raise(rb_eArgError, "opts has to be hash like or convertable into a hash");
635 tmp = rb_hash_aref(opts, ID2SYM(i_indent));
638 Check_Type(tmp, T_STRING);
639 len = RSTRING_LEN(tmp);
640 state->indent = fstrndup(RSTRING_PTR(tmp), len);
641 state->indent_len = len;
643 tmp = rb_hash_aref(opts, ID2SYM(i_space));
646 Check_Type(tmp, T_STRING);
647 len = RSTRING_LEN(tmp);
648 state->space = fstrndup(RSTRING_PTR(tmp), len);
649 state->space_len = len;
651 tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
654 Check_Type(tmp, T_STRING);
655 len = RSTRING_LEN(tmp);
656 state->space_before = fstrndup(RSTRING_PTR(tmp), len);
657 state->space_before_len = len;
659 tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
662 Check_Type(tmp, T_STRING);
663 len = RSTRING_LEN(tmp);
664 state->array_nl = fstrndup(RSTRING_PTR(tmp), len);
665 state->array_nl_len = len;
667 tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
670 Check_Type(tmp, T_STRING);
671 len = RSTRING_LEN(tmp);
672 state->object_nl = fstrndup(RSTRING_PTR(tmp), len);
673 state->object_nl_len = len;
675 tmp = ID2SYM(i_max_nesting);
676 state->max_nesting = 19;
677 if (option_given_p(opts, tmp)) {
678 VALUE max_nesting = rb_hash_aref(opts, tmp);
679 if (RTEST(max_nesting)) {
680 Check_Type(max_nesting, T_FIXNUM);
681 state->max_nesting = FIX2LONG(max_nesting);
683 state->max_nesting = 0;
686 tmp = ID2SYM(i_depth);
688 if (option_given_p(opts, tmp)) {
689 VALUE depth = rb_hash_aref(opts, tmp);
691 Check_Type(depth, T_FIXNUM);
692 state->depth = FIX2LONG(depth);
697 tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan));
698 state->allow_nan = RTEST(tmp);
699 tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
700 state->ascii_only = RTEST(tmp);
701 tmp = rb_hash_aref(opts, ID2SYM(i_quirks_mode));
702 state->quirks_mode = RTEST(tmp);
709 * Returns the configuration instance variables as a hash, that can be
710 * passed to the configure method.
712 static VALUE cState_to_h(VALUE self)
714 VALUE result = rb_hash_new();
716 rb_hash_aset(result, ID2SYM(i_indent), rb_str_new(state->indent, state->indent_len));
717 rb_hash_aset(result, ID2SYM(i_space), rb_str_new(state->space, state->space_len));
718 rb_hash_aset(result, ID2SYM(i_space_before), rb_str_new(state->space_before, state->space_before_len));
719 rb_hash_aset(result, ID2SYM(i_object_nl), rb_str_new(state->object_nl, state->object_nl_len));
720 rb_hash_aset(result, ID2SYM(i_array_nl), rb_str_new(state->array_nl, state->array_nl_len));
721 rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
722 rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
723 rb_hash_aset(result, ID2SYM(i_quirks_mode), state->quirks_mode ? Qtrue : Qfalse);
724 rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
725 rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
732 * Return the value returned by method +name+.
734 static VALUE cState_aref(VALUE self, VALUE name)
737 if (RTEST(rb_funcall(self, i_respond_to_p, 1, name))) {
738 return rb_funcall(self, i_send, 1, name);
744 static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
746 char *object_nl = state->object_nl;
747 long object_nl_len = state->object_nl_len;
748 char *indent = state->indent;
749 long indent_len = state->indent_len;
750 long max_nesting = state->max_nesting;
751 char *delim = FBUFFER_PTR(state->object_delim);
752 long delim_len = FBUFFER_LEN(state->object_delim);
753 char *delim2 = FBUFFER_PTR(state->object_delim2);
754 long delim2_len = FBUFFER_LEN(state->object_delim2);
755 long depth = ++state->depth;
757 VALUE key, key_to_s, keys;
758 if (max_nesting != 0 && depth > max_nesting) {
759 fbuffer_free(buffer);
760 rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
762 fbuffer_append_char(buffer, '{');
763 keys = rb_funcall(obj, i_keys, 0);
764 for(i = 0; i < RARRAY_LEN(keys); i++) {
765 if (i > 0) fbuffer_append(buffer, delim, delim_len);
767 fbuffer_append(buffer, object_nl, object_nl_len);
770 for (j = 0; j < depth; j++) {
771 fbuffer_append(buffer, indent, indent_len);
774 key = rb_ary_entry(keys, i);
775 key_to_s = rb_funcall(key, i_to_s, 0);
776 Check_Type(key_to_s, T_STRING);
777 generate_json(buffer, Vstate, state, key_to_s);
778 fbuffer_append(buffer, delim2, delim2_len);
779 generate_json(buffer, Vstate, state, rb_hash_aref(obj, key));
781 depth = --state->depth;
783 fbuffer_append(buffer, object_nl, object_nl_len);
785 for (j = 0; j < depth; j++) {
786 fbuffer_append(buffer, indent, indent_len);
790 fbuffer_append_char(buffer, '}');
793 static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
795 char *array_nl = state->array_nl;
796 long array_nl_len = state->array_nl_len;
797 char *indent = state->indent;
798 long indent_len = state->indent_len;
799 long max_nesting = state->max_nesting;
800 char *delim = FBUFFER_PTR(state->array_delim);
801 long delim_len = FBUFFER_LEN(state->array_delim);
802 long depth = ++state->depth;
804 if (max_nesting != 0 && depth > max_nesting) {
805 fbuffer_free(buffer);
806 rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
808 fbuffer_append_char(buffer, '[');
809 if (array_nl) fbuffer_append(buffer, array_nl, array_nl_len);
810 for(i = 0; i < RARRAY_LEN(obj); i++) {
811 if (i > 0) fbuffer_append(buffer, delim, delim_len);
813 for (j = 0; j < depth; j++) {
814 fbuffer_append(buffer, indent, indent_len);
817 generate_json(buffer, Vstate, state, rb_ary_entry(obj, i));
819 state->depth = --depth;
821 fbuffer_append(buffer, array_nl, array_nl_len);
823 for (j = 0; j < depth; j++) {
824 fbuffer_append(buffer, indent, indent_len);
828 fbuffer_append_char(buffer, ']');
831 static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
833 fbuffer_append_char(buffer, '"');
834 #ifdef HAVE_RUBY_ENCODING_H
835 obj = rb_funcall(obj, i_encode, 1, CEncoding_UTF_8);
837 if (state->ascii_only) {
838 convert_UTF8_to_JSON_ASCII(buffer, obj);
840 convert_UTF8_to_JSON(buffer, obj);
842 fbuffer_append_char(buffer, '"');
845 static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
847 fbuffer_append(buffer, "null", 4);
850 static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
852 fbuffer_append(buffer, "false", 5);
855 static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
857 fbuffer_append(buffer, "true", 4);
860 static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
862 fbuffer_append_long(buffer, FIX2LONG(obj));
865 static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
867 VALUE tmp = rb_funcall(obj, i_to_s, 0);
868 fbuffer_append_str(buffer, tmp);
871 static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
873 double value = RFLOAT_VALUE(obj);
874 char allow_nan = state->allow_nan;
875 VALUE tmp = rb_funcall(obj, i_to_s, 0);
878 fbuffer_free(buffer);
879 rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp));
880 } else if (isnan(value)) {
881 fbuffer_free(buffer);
882 rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp));
885 fbuffer_append_str(buffer, tmp);
888 static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
891 VALUE klass = CLASS_OF(obj);
892 if (klass == rb_cHash) {
893 generate_json_object(buffer, Vstate, state, obj);
894 } else if (klass == rb_cArray) {
895 generate_json_array(buffer, Vstate, state, obj);
896 } else if (klass == rb_cString) {
897 generate_json_string(buffer, Vstate, state, obj);
898 } else if (obj == Qnil) {
899 generate_json_null(buffer, Vstate, state, obj);
900 } else if (obj == Qfalse) {
901 generate_json_false(buffer, Vstate, state, obj);
902 } else if (obj == Qtrue) {
903 generate_json_true(buffer, Vstate, state, obj);
904 } else if (klass == rb_cFixnum) {
905 generate_json_fixnum(buffer, Vstate, state, obj);
906 } else if (klass == rb_cBignum) {
907 generate_json_bignum(buffer, Vstate, state, obj);
908 } else if (klass == rb_cFloat) {
909 generate_json_float(buffer, Vstate, state, obj);
910 } else if (rb_respond_to(obj, i_to_json)) {
911 tmp = rb_funcall(obj, i_to_json, 1, Vstate);
912 Check_Type(tmp, T_STRING);
913 fbuffer_append_str(buffer, tmp);
915 tmp = rb_funcall(obj, i_to_s, 0);
916 Check_Type(tmp, T_STRING);
917 generate_json(buffer, Vstate, state, tmp);
921 static FBuffer *cState_prepare_buffer(VALUE self)
923 FBuffer *buffer = fbuffer_alloc();
926 if (state->object_delim) {
927 fbuffer_clear(state->object_delim);
929 state->object_delim = fbuffer_alloc_with_length(16);
931 fbuffer_append_char(state->object_delim, ',');
932 if (state->object_delim2) {
933 fbuffer_clear(state->object_delim2);
935 state->object_delim2 = fbuffer_alloc_with_length(16);
937 fbuffer_append_char(state->object_delim2, ':');
938 if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len);
940 if (state->array_delim) {
941 fbuffer_clear(state->array_delim);
943 state->array_delim = fbuffer_alloc_with_length(16);
945 fbuffer_append_char(state->array_delim, ',');
946 if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len);
950 static VALUE fbuffer_to_s(FBuffer *fb)
952 VALUE result = rb_str_new(FBUFFER_PAIR(fb));
958 static VALUE cState_partial_generate(VALUE self, VALUE obj)
960 FBuffer *buffer = cState_prepare_buffer(self);
962 generate_json(buffer, self, state, obj);
963 return fbuffer_to_s(buffer);
967 * call-seq: generate(obj)
969 * Generates a valid JSON document from object +obj+ and returns the
970 * result. If no valid JSON document can be created this method raises a
971 * GeneratorError exception.
973 static VALUE cState_generate(VALUE self, VALUE obj)
975 VALUE result = cState_partial_generate(self, obj);
978 if (!state->quirks_mode) {
979 args[0] = rb_str_new2("\\A\\s*(?:\\[.*\\]|\\{.*\\})\\s*\\Z");
980 args[1] = CRegexp_MULTILINE;
981 re = rb_class_new_instance(2, args, rb_cRegexp);
982 if (NIL_P(rb_funcall(re, i_match, 1, result))) {
983 rb_raise(eGeneratorError, "only generation of JSON objects or arrays allowed");
990 * call-seq: new(opts = {})
992 * Instantiates a new State object, configured by _opts_.
994 * _opts_ can have the following keys:
996 * * *indent*: a string used to indent levels (default: ''),
997 * * *space*: a string that is put after, a : or , delimiter (default: ''),
998 * * *space_before*: a string that is put before a : pair delimiter (default: ''),
999 * * *object_nl*: a string that is put at the end of a JSON object (default: ''),
1000 * * *array_nl*: a string that is put at the end of a JSON array (default: ''),
1001 * * *allow_nan*: true if NaN, Infinity, and -Infinity should be
1002 * generated, otherwise an exception is thrown, if these values are
1003 * encountered. This options defaults to false.
1004 * * *quirks_mode*: Enables quirks_mode for parser, that is for example
1005 * generating single JSON values instead of documents is possible.
1007 static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
1011 state->max_nesting = 19;
1012 rb_scan_args(argc, argv, "01", &opts);
1013 if (!NIL_P(opts)) cState_configure(self, opts);
1018 * call-seq: initialize_copy(orig)
1020 * Initializes this object from orig if it to be duplicated/cloned and returns
1023 static VALUE cState_init_copy(VALUE obj, VALUE orig)
1025 JSON_Generator_State *objState, *origState;
1027 Data_Get_Struct(obj, JSON_Generator_State, objState);
1028 Data_Get_Struct(orig, JSON_Generator_State, origState);
1029 if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State");
1031 MEMCPY(objState, origState, JSON_Generator_State, 1);
1032 objState->indent = fstrndup(origState->indent, origState->indent_len);
1033 objState->space = fstrndup(origState->space, origState->space_len);
1034 objState->space_before = fstrndup(origState->space_before, origState->space_before_len);
1035 objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len);
1036 objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len);
1037 if (origState->array_delim) objState->array_delim = fbuffer_dup(origState->array_delim);
1038 if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim);
1039 if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2);
1044 * call-seq: from_state(opts)
1046 * Creates a State object from _opts_, which ought to be Hash to create a
1047 * new State instance configured by _opts_, something else to create an
1048 * unconfigured instance. If _opts_ is a State object, it is just returned.
1050 static VALUE cState_from_state_s(VALUE self, VALUE opts)
1052 if (rb_obj_is_kind_of(opts, self)) {
1054 } else if (rb_obj_is_kind_of(opts, rb_cHash)) {
1055 return rb_funcall(self, i_new, 1, opts);
1057 if (NIL_P(CJSON_SAFE_STATE_PROTOTYPE)) {
1058 CJSON_SAFE_STATE_PROTOTYPE = rb_const_get(mJSON, i_SAFE_STATE_PROTOTYPE);
1060 return rb_funcall(CJSON_SAFE_STATE_PROTOTYPE, i_dup, 0);
1065 * call-seq: indent()
1067 * This string is used to indent levels in the JSON text.
1069 static VALUE cState_indent(VALUE self)
1072 return state->indent ? rb_str_new2(state->indent) : rb_str_new2("");
1076 * call-seq: indent=(indent)
1078 * This string is used to indent levels in the JSON text.
1080 static VALUE cState_indent_set(VALUE self, VALUE indent)
1084 Check_Type(indent, T_STRING);
1085 len = RSTRING_LEN(indent);
1087 if (state->indent) {
1088 ruby_xfree(state->indent);
1089 state->indent = NULL;
1090 state->indent_len = 0;
1093 if (state->indent) ruby_xfree(state->indent);
1094 state->indent = strdup(RSTRING_PTR(indent));
1095 state->indent_len = len;
1103 * This string is used to insert a space between the tokens in a JSON
1106 static VALUE cState_space(VALUE self)
1109 return state->space ? rb_str_new2(state->space) : rb_str_new2("");
1113 * call-seq: space=(space)
1115 * This string is used to insert a space between the tokens in a JSON
1118 static VALUE cState_space_set(VALUE self, VALUE space)
1122 Check_Type(space, T_STRING);
1123 len = RSTRING_LEN(space);
1126 ruby_xfree(state->space);
1127 state->space = NULL;
1128 state->space_len = 0;
1131 if (state->space) ruby_xfree(state->space);
1132 state->space = strdup(RSTRING_PTR(space));
1133 state->space_len = len;
1139 * call-seq: space_before()
1141 * This string is used to insert a space before the ':' in JSON objects.
1143 static VALUE cState_space_before(VALUE self)
1146 return state->space_before ? rb_str_new2(state->space_before) : rb_str_new2("");
1150 * call-seq: space_before=(space_before)
1152 * This string is used to insert a space before the ':' in JSON objects.
1154 static VALUE cState_space_before_set(VALUE self, VALUE space_before)
1158 Check_Type(space_before, T_STRING);
1159 len = RSTRING_LEN(space_before);
1161 if (state->space_before) {
1162 ruby_xfree(state->space_before);
1163 state->space_before = NULL;
1164 state->space_before_len = 0;
1167 if (state->space_before) ruby_xfree(state->space_before);
1168 state->space_before = strdup(RSTRING_PTR(space_before));
1169 state->space_before_len = len;
1175 * call-seq: object_nl()
1177 * This string is put at the end of a line that holds a JSON object (or
1180 static VALUE cState_object_nl(VALUE self)
1183 return state->object_nl ? rb_str_new2(state->object_nl) : rb_str_new2("");
1187 * call-seq: object_nl=(object_nl)
1189 * This string is put at the end of a line that holds a JSON object (or
1192 static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
1196 Check_Type(object_nl, T_STRING);
1197 len = RSTRING_LEN(object_nl);
1199 if (state->object_nl) {
1200 ruby_xfree(state->object_nl);
1201 state->object_nl = NULL;
1204 if (state->object_nl) ruby_xfree(state->object_nl);
1205 state->object_nl = strdup(RSTRING_PTR(object_nl));
1206 state->object_nl_len = len;
1212 * call-seq: array_nl()
1214 * This string is put at the end of a line that holds a JSON array.
1216 static VALUE cState_array_nl(VALUE self)
1219 return state->array_nl ? rb_str_new2(state->array_nl) : rb_str_new2("");
1223 * call-seq: array_nl=(array_nl)
1225 * This string is put at the end of a line that holds a JSON array.
1227 static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
1231 Check_Type(array_nl, T_STRING);
1232 len = RSTRING_LEN(array_nl);
1234 if (state->array_nl) {
1235 ruby_xfree(state->array_nl);
1236 state->array_nl = NULL;
1239 if (state->array_nl) ruby_xfree(state->array_nl);
1240 state->array_nl = strdup(RSTRING_PTR(array_nl));
1241 state->array_nl_len = len;
1248 * call-seq: check_circular?
1250 * Returns true, if circular data structures should be checked,
1251 * otherwise returns false.
1253 static VALUE cState_check_circular_p(VALUE self)
1256 return state->max_nesting ? Qtrue : Qfalse;
1260 * call-seq: max_nesting
1262 * This integer returns the maximum level of data structure nesting in
1263 * the generated JSON, max_nesting = 0 if no maximum is checked.
1265 static VALUE cState_max_nesting(VALUE self)
1268 return LONG2FIX(state->max_nesting);
1272 * call-seq: max_nesting=(depth)
1274 * This sets the maximum level of data structure nesting in the generated JSON
1275 * to the integer depth, max_nesting = 0 if no maximum should be checked.
1277 static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1280 Check_Type(depth, T_FIXNUM);
1281 return state->max_nesting = FIX2LONG(depth);
1285 * call-seq: allow_nan?
1287 * Returns true, if NaN, Infinity, and -Infinity should be generated, otherwise
1290 static VALUE cState_allow_nan_p(VALUE self)
1293 return state->allow_nan ? Qtrue : Qfalse;
1297 * call-seq: ascii_only?
1299 * Returns true, if NaN, Infinity, and -Infinity should be generated, otherwise
1302 static VALUE cState_ascii_only_p(VALUE self)
1305 return state->ascii_only ? Qtrue : Qfalse;
1309 * call-seq: quirks_mode?
1311 * Returns true, if quirks mode is enabled. Otherwise returns false.
1313 static VALUE cState_quirks_mode_p(VALUE self)
1316 return state->quirks_mode ? Qtrue : Qfalse;
1320 * call-seq: quirks_mode=(enable)
1322 * If set to true, enables the quirks_mode mode.
1324 static VALUE cState_quirks_mode_set(VALUE self, VALUE enable)
1327 state->quirks_mode = RTEST(enable);
1334 * This integer returns the current depth of data structure nesting.
1336 static VALUE cState_depth(VALUE self)
1339 return LONG2FIX(state->depth);
1343 * call-seq: depth=(depth)
1345 * This sets the maximum level of data structure nesting in the generated JSON
1346 * to the integer depth, max_nesting = 0 if no maximum should be checked.
1348 static VALUE cState_depth_set(VALUE self, VALUE depth)
1351 Check_Type(depth, T_FIXNUM);
1352 return state->depth = FIX2LONG(depth);
1358 void Init_generator()
1360 rb_require("json/common");
1362 mJSON = rb_define_module("JSON");
1363 mExt = rb_define_module_under(mJSON, "Ext");
1364 mGenerator = rb_define_module_under(mExt, "Generator");
1366 eGeneratorError = rb_path2class("JSON::GeneratorError");
1367 eNestingError = rb_path2class("JSON::NestingError");
1369 cState = rb_define_class_under(mGenerator, "State", rb_cObject);
1370 rb_define_alloc_func(cState, cState_s_allocate);
1371 rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
1372 rb_define_method(cState, "initialize", cState_initialize, -1);
1373 rb_define_method(cState, "initialize_copy", cState_init_copy, 1);
1374 rb_define_method(cState, "indent", cState_indent, 0);
1375 rb_define_method(cState, "indent=", cState_indent_set, 1);
1376 rb_define_method(cState, "space", cState_space, 0);
1377 rb_define_method(cState, "space=", cState_space_set, 1);
1378 rb_define_method(cState, "space_before", cState_space_before, 0);
1379 rb_define_method(cState, "space_before=", cState_space_before_set, 1);
1380 rb_define_method(cState, "object_nl", cState_object_nl, 0);
1381 rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
1382 rb_define_method(cState, "array_nl", cState_array_nl, 0);
1383 rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
1384 rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
1385 rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
1386 rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
1387 rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
1388 rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
1389 rb_define_method(cState, "quirks_mode?", cState_quirks_mode_p, 0);
1390 rb_define_method(cState, "quirks_mode", cState_quirks_mode_p, 0);
1391 rb_define_method(cState, "quirks_mode=", cState_quirks_mode_set, 1);
1392 rb_define_method(cState, "depth", cState_depth, 0);
1393 rb_define_method(cState, "depth=", cState_depth_set, 1);
1394 rb_define_method(cState, "configure", cState_configure, 1);
1395 rb_define_alias(cState, "merge", "configure");
1396 rb_define_method(cState, "to_h", cState_to_h, 0);
1397 rb_define_method(cState, "[]", cState_aref, 1);
1398 rb_define_method(cState, "generate", cState_generate, 1);
1400 mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
1401 mObject = rb_define_module_under(mGeneratorMethods, "Object");
1402 rb_define_method(mObject, "to_json", mObject_to_json, -1);
1403 mHash = rb_define_module_under(mGeneratorMethods, "Hash");
1404 rb_define_method(mHash, "to_json", mHash_to_json, -1);
1405 mArray = rb_define_module_under(mGeneratorMethods, "Array");
1406 rb_define_method(mArray, "to_json", mArray_to_json, -1);
1407 mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
1408 rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1);
1409 mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
1410 rb_define_method(mBignum, "to_json", mBignum_to_json, -1);
1411 mFloat = rb_define_module_under(mGeneratorMethods, "Float");
1412 rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
1413 mString = rb_define_module_under(mGeneratorMethods, "String");
1414 rb_define_singleton_method(mString, "included", mString_included_s, 1);
1415 rb_define_method(mString, "to_json", mString_to_json, -1);
1416 rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
1417 rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
1418 mString_Extend = rb_define_module_under(mString, "Extend");
1419 rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
1420 mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
1421 rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
1422 mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
1423 rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
1424 mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
1425 rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
1427 CRegexp_MULTILINE = rb_const_get(rb_cRegexp, rb_intern("MULTILINE"));
1428 i_to_s = rb_intern("to_s");
1429 i_to_json = rb_intern("to_json");
1430 i_new = rb_intern("new");
1431 i_indent = rb_intern("indent");
1432 i_space = rb_intern("space");
1433 i_space_before = rb_intern("space_before");
1434 i_object_nl = rb_intern("object_nl");
1435 i_array_nl = rb_intern("array_nl");
1436 i_max_nesting = rb_intern("max_nesting");
1437 i_allow_nan = rb_intern("allow_nan");
1438 i_ascii_only = rb_intern("ascii_only");
1439 i_quirks_mode = rb_intern("quirks_mode");
1440 i_depth = rb_intern("depth");
1441 i_pack = rb_intern("pack");
1442 i_unpack = rb_intern("unpack");
1443 i_create_id = rb_intern("create_id");
1444 i_extend = rb_intern("extend");
1445 i_key_p = rb_intern("key?");
1446 i_aref = rb_intern("[]");
1447 i_send = rb_intern("__send__");
1448 i_respond_to_p = rb_intern("respond_to?");
1449 i_match = rb_intern("match");
1450 i_keys = rb_intern("keys");
1451 i_dup = rb_intern("dup");
1452 #ifdef HAVE_RUBY_ENCODING_H
1453 CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
1454 i_encoding = rb_intern("encoding");
1455 i_encode = rb_intern("encode");
1457 i_SAFE_STATE_PROTOTYPE = rb_intern("SAFE_STATE_PROTOTYPE");
1458 CJSON_SAFE_STATE_PROTOTYPE = Qnil;